#include <algorithm>
#include <clang/AST/Type.h>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <iterator>
#include <optional>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Path.h"
// Declares clang::SyntaxOnlyAction.
#include "clang/Frontend/FrontendActions.h"
#include "clang/Tooling/CommonOptionsParser.h"

#include "clang/AST/DeclVisitor.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/AST/TypeVisitor.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/Version.h"
#include "clang/Frontend/CompilerInstance.h"
#if CLANG_VERSION_MAJOR < 10
#include "clang/Frontend/LangStandard.h"
#else
#include "clang/Basic/LangStandard.h"
#endif // CLANG_VERSION_MAJOR < 10
#include "clang/Tooling/Tooling.h"

#include "AstExporter.hpp"
#include "ExportResult.hpp"
#include "FloatingLexer.h"
#include "ast_tags.hpp"
#include <tinycbor/cbor.h>

using namespace llvm;
using namespace clang;
using namespace clang::tooling;

#define DEBUG_TYPE "c2rust-ast-exporter"

#ifndef LLVM_DEBUG
#define LLVM_DEBUG DEBUG
#endif

using clang::ASTContext;
using clang::QualType;
using std::string;

namespace {
// Encode a string object assuming that it is valid UTF-8 encoded text
void cbor_encode_string(CborEncoder *encoder, const std::string &str) {
    auto ptr = str.data();
    auto len = str.size();
    cbor_encode_text_string(encoder, ptr, len);
}

// Encode an array of strings assuming that it is valid UTF-8 encoded text
void cbor_encode_string_array(CborEncoder *encoder,
                              const ArrayRef<std::string> strs) {
    CborEncoder array;
    cbor_encoder_create_array(encoder, &array, strs.size());

    for (auto &s : strs) {
        cbor_encode_string(&array, s);
    }

    cbor_encoder_close_container(encoder, &array);
}

std::string make_realpath(std::string const &path) {
    if (auto abs_path = realpath(path.c_str(), nullptr)) {
        auto result = std::string(abs_path);
        free(abs_path);
        return result;
    } else {
        std::cerr << "make_realpath: File not found: " << path << std::endl;
        abort();
    }
}

// Helper to smooth out differences between versions of clang
#if CLANG_VERSION_MAJOR < 17
Optional<APSInt> getIntegerConstantExpr(const Expr &E, const ASTContext &Ctx) {
#if CLANG_VERSION_MAJOR < 12
    APSInt value;
    if (E.isIntegerConstantExpr(value, Ctx))
        return {value};
    else
        return Optional<APSInt>();
#else
    return E.getIntegerConstantExpr(Ctx);
#endif // CLANG_VERSION_MAJOR
}
#else
std::optional<APSInt> getIntegerConstantExpr(const Expr &E,
                                             const ASTContext &Ctx) {
    return E.getIntegerConstantExpr(Ctx);
}
#endif // CLANG_VERSION_MAJOR
} // namespace

DiagnosticBuilder getDiagBuilder(ASTContext *Context,
                                 SourceLocation Loc,
                                 DiagnosticsEngine::Level Lvl) {
    auto &DiagEngine = Context->getDiagnostics();
    // Prefix warnings with `c2rust`, so the user can distinguish
    // our warning messages from those generated by clang itself.
    const auto ID = DiagEngine.getCustomDiagID(Lvl, "c2rust: %0");
    return DiagEngine.Report(Loc, ID);
}

void printDiag(ASTContext *Context,
               DiagnosticsEngine::Level Lvl,
               std::string Message,
               SourceLocation S,
               SourceRange R = SourceRange()) {
    auto DiagBuilder =
        getDiagBuilder(Context, S, Lvl);
    DiagBuilder.AddString(Message);
    DiagBuilder.AddSourceRange(
        CharSourceRange::getCharRange(R));
}

SourceLocation getSourceLocation(const Decl *D) {
    return D->getLocation();
}

SourceLocation getSourceLocation(const Expr *E) {
    return E->getExprLoc();
}

SourceLocation getSourceLocation(const Stmt *S) {
#if CLANG_VERSION_MAJOR < 8
    return S->getLocStart();
#else
    return S->getBeginLoc();
#endif
}

template <class T> // Usually `Decl`, `Expr`, or `Stmt`.
void printDiag(ASTContext *Context, DiagnosticsEngine::Level Lvl, std::string Message, const T *t) {
    const SourceLocation loc = getSourceLocation(t);
    if (loc.isInvalid()) {
        t->dump();
    }
    printDiag(Context, Lvl, Message, loc, t->getSourceRange());
}

class TranslateASTVisitor;

class TypeEncoder final : public TypeVisitor<TypeEncoder> {
    ASTContext *Context;
    CborEncoder *encoder;
    std::unordered_map<void *, QualType> *sugared;
    TranslateASTVisitor *astEncoder;

    // Bounds recursion when visiting self-referential record declarations
    std::unordered_set<const clang::RecordDecl *> recordDeclsUnderVisit;

    std::unordered_set<const clang::Type *> exports;

public:
    /// Set before `TypeEncoder::VisitQualType(ty)` in `TypeEncoder::VisitQualTypeOf`.
    SourceLocation src_loc;
    SourceRange src_range;

private:
    bool markExported(const clang::Type *ptr) {
        return exports.emplace(ptr).second;
    }

    bool isExported(const clang::Type *ptr) {
        return exports.find(ptr) != exports.end();
    }

    void encodeType(
        const clang::Type *T, TypeTag tag,
        std::function<void(CborEncoder *)> extra = [](CborEncoder *) {}) {
        if (!markExported(T))
            return;

        CborEncoder local;
        cbor_encoder_create_array(encoder, &local, CborIndefiniteLength);

        // 1 - Entity ID
        cbor_encode_uint(&local, uintptr_t(T));

        // 2 - Type tag
        cbor_encode_uint(&local, tag);

        // 3 - extras
        extra(&local);

        cbor_encoder_close_container(encoder, &local);
    }

  public:
    uintptr_t encodeQualType(QualType t) {
        auto s = t.split();

        auto desugared = sugared->find((void *)s.Ty);
        if (desugared != sugared->end())
            return encodeQualType(desugared->second);

        auto i = uintptr_t(s.Ty);

        if (t.isConstQualified()) {
            i |= 1;
        }
        if (t.isRestrictQualified()) {
            i |= 2;
        }
        if (t.isVolatileQualified()) {
            i |= 4;
        }

        return i;
    }

    explicit TypeEncoder(ASTContext *Context, CborEncoder *encoder,
                         std::unordered_map<void *, QualType> *sugared,
                         TranslateASTVisitor *ast)
        : Context(Context), encoder(encoder), sugared(sugared),
          astEncoder(ast) {}

    template <class T> // Usually `Decl`, `Expr`, or `Stmt`.
    void VisitQualTypeOf(const QualType &QT, const T *t) {
        src_loc = getSourceLocation(t);
        src_range = t->getSourceRange();
        VisitQualType(QT);
    }

    void VisitQualType(const QualType &QT) {
        if (!QT.isNull()) {
            auto s = QT.split();

            auto desugared = sugared->find((void *)s.Ty);
            if (desugared != sugared->end())
                VisitQualType(desugared->second);
            else if (!isExported(s.Ty)) {
                Visit(s.Ty);
            }
        }
    }

    void VisitAttributedType(const AttributedType *T) {
        auto t = T->getModifiedType();
        auto qt = encodeQualType(t);
        auto k = T->getAttrKind();

        encodeType(T, TagAttributedType, [qt, k](CborEncoder *local) {
            cbor_encode_uint(local, qt);

            const char *tag;
            switch (k) {
            default: tag = nullptr; break;

#if CLANG_VERSION_MAJOR < 8
            case AttributedType::attr_noreturn:
#else
            case attr::NoReturn:
#endif // CLANG_VERSION_MAJOR
                tag = "noreturn";
                break;

#if CLANG_VERSION_MAJOR < 8
            case AttributedType::attr_nonnull:
#else
            case attr::TypeNonNull:
#endif // CLANG_VERSION_MAJOR
                tag = "notnull";
                break;

#if CLANG_VERSION_MAJOR < 8
            case AttributedType::attr_nullable:
#else
            case attr::TypeNullable:
#endif // CLANG_VERSION_MAJOR
                tag = "nullable";
                break;
            }
            if (tag) {
                cbor_encode_text_stringz(local, tag);
            } else {
                cbor_encode_null(local);
            }
        });

        VisitQualType(t);
    }

    void VisitParenType(const ParenType *T) {
        auto t = T->getInnerType();
        auto qt = encodeQualType(t);

        encodeType(T, TagParenType,
                   [qt](CborEncoder *local) { cbor_encode_uint(local, qt); });

        VisitQualType(t);
    }

    void VisitEnumType(const EnumType *T);

    void VisitConstantArrayType(const ConstantArrayType *T) {
        auto t = T->getElementType();
        auto qt = encodeQualType(t);

        encodeType(T, TagConstantArrayType, [T, qt](CborEncoder *local) {
            cbor_encode_uint(local, qt);
            cbor_encode_uint(local, T->getSize().getLimitedValue());
        });

        VisitQualType(t);
    }

    void VisitVariableArrayType(const VariableArrayType *T);

    void VisitAtomicType(const AtomicType *AT);

    void VisitIncompleteArrayType(const IncompleteArrayType *T) {
        auto t = T->getElementType();
        auto qt = encodeQualType(t);

        encodeType(T, TagIncompleteArrayType,
                   [qt](CborEncoder *local) { cbor_encode_uint(local, qt); });

        VisitQualType(t);
    }

    void VisitBlockPointerType(const BlockPointerType *T) {
        auto t = T->getPointeeType();
        auto qt = encodeQualType(t);

        encodeType(T, TagBlockPointer,
                   [qt](CborEncoder *local) { cbor_encode_uint(local, qt); });

        VisitQualType(t);
    }

    // definition below due to recursive call into AST translator
    void VisitRecordType(const RecordType *T);

    void VisitVectorType(const clang::VectorType *T) {
        auto t = T->getElementType();
        auto qt = encodeQualType(t);

        encodeType(T, TagVectorType, [T, qt](CborEncoder *local) {
            cbor_encode_uint(local, qt);
            cbor_encode_uint(local, T->getNumElements());
        });

        VisitQualType(t);
    }

    void VisitComplexType(const ComplexType *T) {
        auto t = T->getElementType();
        auto qt = encodeQualType(t);

        encodeType(T, TagComplexType,
                   [qt](CborEncoder *local) { cbor_encode_uint(local, qt); });

        VisitQualType(t);
    }

    void VisitBuiltinType(const BuiltinType *T) {
        auto kind = T->getKind();

#if CLANG_VERSION_MAJOR >= 10
        // Handle built-in vector types as if they're normal vector types
        if (kind >= BuiltinType::SveInt8 && kind <= BuiltinType::SveBool
#if CLANG_VERSION_MAJOR >= 13
            /* RISC-V vector types */
            || kind >= BuiltinType::RvvInt8mf8 && kind <= BuiltinType::RvvBool64
#endif // CLANG_VERSION_MAJOR >= 13
            ) {
// Declare ElemType and ElemCount as needed by various Clang versions
#if CLANG_VERSION_MAJOR >= 11
            auto Info = Context->getBuiltinVectorTypeInfo(T);
            auto ElemType = Info.ElementType;

#if CLANG_VERSION_MAJOR >= 12
            auto ElemCount = Info.EC.getKnownMinValue() * Info.NumVectors;
#else // CLANG_VERSION_MAJOR >= 12
            // getKnownMinValue was added in Clang 12.
            auto ElemCount = Info.EC.Min * Info.NumVectors;
#endif // CLANG_VERSION_MAJOR >= 12
#else // CLANG_VERSION_MAJOR >= 11
            auto &Ctx = *Context;
            // Copy-pasted from Type::getSveEltType introduced after Clang 10:
            // (Not extended for RISCV types
            // as they are not available in that version anyway).
            auto ElemType = [&] {
                switch (kind) {
                default: llvm_unreachable("Unknown builtin SVE type!");
                case BuiltinType::SveInt8: return Ctx.SignedCharTy;
                case BuiltinType::SveUint8: return Ctx.UnsignedCharTy;
                case BuiltinType::SveBool: return Ctx.UnsignedCharTy;
                case BuiltinType::SveInt16: return Ctx.ShortTy;
                case BuiltinType::SveUint16: return Ctx.UnsignedShortTy;
                case BuiltinType::SveInt32: return Ctx.IntTy;
                case BuiltinType::SveUint32: return Ctx.UnsignedIntTy;
                case BuiltinType::SveInt64: return Ctx.LongTy;
                case BuiltinType::SveUint64: return Ctx.UnsignedLongTy;
                case BuiltinType::SveFloat16: return Ctx.Float16Ty;
                case BuiltinType::SveFloat32: return Ctx.FloatTy;
                case BuiltinType::SveFloat64: return Ctx.DoubleTy;
                }
            }();
            // All the SVE types present in Clang 10 are 128-bit vectors
            // (see `AArch64SVEACLETypes.def`), so we can divide 128
            // by their element size to get element count.
            auto ElemCount = 128 / Context->getTypeSize(ElemType);
#endif // CLANG_VERSION_MAJOR >= 11
            auto ElemTypeTag = encodeQualType(ElemType);
            encodeType(T, TagVectorType,
                       [&](CborEncoder *local) {
                           cbor_encode_uint(local, ElemTypeTag);
                           cbor_encode_uint(local, ElemCount);
                       });

            VisitQualType(ElemType);
            return;
        }
#endif // CLANG_VERSION_MAJOR >= 10

        const TypeTag tag = [&] {
            switch (kind) {
            case BuiltinType::BuiltinFn: return TagBuiltinFn;
            case BuiltinType::UInt128: return TagUInt128;
            case BuiltinType::Int128: return TagInt128;
            case BuiltinType::Short: return TagShort;
            case BuiltinType::Int: return TagInt;
            case BuiltinType::Long: return TagLong;
            case BuiltinType::LongLong: return TagLongLong;
            case BuiltinType::UShort: return TagUShort;
            case BuiltinType::UInt: return TagUInt;
            case BuiltinType::ULong: return TagULong;
            case BuiltinType::ULongLong: return TagULongLong;
            // Constructed as a consequence of the conversion of
            // built-in to normal vector types.
            case BuiltinType::Float16: return TagHalf;
            case BuiltinType::Half: return TagHalf;

#if CLANG_VERSION_MAJOR >= 11
            case BuiltinType::BFloat16: return TagBFloat16;
#endif

            case BuiltinType::Float: return TagFloat;
            case BuiltinType::Double: return TagDouble;
            case BuiltinType::LongDouble: return TagLongDouble;
            case BuiltinType::SChar: return TagSChar;
            case BuiltinType::UChar: return TagUChar;
            case BuiltinType::Char_U: return TagChar;
            case BuiltinType::Char_S: return TagChar;
            case BuiltinType::Void: return TagVoid;
            case BuiltinType::Bool: return TagBool;
            case BuiltinType::WChar_S: return TagSWChar;
            case BuiltinType::WChar_U: return TagUWChar;

#if CLANG_VERSION_MAJOR >= 17
            case BuiltinType::Float128: return TagFloat128;

            // From `clang/include/clang/Basic/AArch64ACLETypes.def`,
            // but we can't `#include` it in an external clang tool.
            case BuiltinType::SveInt8:
            case BuiltinType::SveInt16:
            case BuiltinType::SveInt32:
            case BuiltinType::SveInt64:
            case BuiltinType::SveUint8:
            case BuiltinType::SveUint16:
            case BuiltinType::SveUint32:
            case BuiltinType::SveUint64:
            case BuiltinType::SveFloat16:
            case BuiltinType::SveFloat32:
            case BuiltinType::SveFloat64:
            case BuiltinType::SveBFloat16:
            case BuiltinType::SveInt8x2:
            case BuiltinType::SveInt16x2:
            case BuiltinType::SveInt32x2:
            case BuiltinType::SveInt64x2:
            case BuiltinType::SveUint8x2:
            case BuiltinType::SveUint16x2:
            case BuiltinType::SveUint32x2:
            case BuiltinType::SveUint64x2:
            case BuiltinType::SveFloat16x2:
            case BuiltinType::SveFloat32x2:
            case BuiltinType::SveFloat64x2:
            case BuiltinType::SveBFloat16x2:
            case BuiltinType::SveInt8x3:
            case BuiltinType::SveInt16x3:
            case BuiltinType::SveInt32x3:
            case BuiltinType::SveInt64x3:
            case BuiltinType::SveUint8x3:
            case BuiltinType::SveUint16x3:
            case BuiltinType::SveUint32x3:
            case BuiltinType::SveUint64x3:
            case BuiltinType::SveFloat16x3:
            case BuiltinType::SveFloat32x3:
            case BuiltinType::SveFloat64x3:
            case BuiltinType::SveBFloat16x3:
            case BuiltinType::SveInt8x4:
            case BuiltinType::SveInt16x4:
            case BuiltinType::SveInt32x4:
            case BuiltinType::SveInt64x4:
            case BuiltinType::SveUint8x4:
            case BuiltinType::SveUint16x4:
            case BuiltinType::SveUint32x4:
            case BuiltinType::SveUint64x4:
            case BuiltinType::SveFloat16x4:
            case BuiltinType::SveFloat32x4:
            case BuiltinType::SveFloat64x4:
            case BuiltinType::SveBFloat16x4:
            case BuiltinType::SveBool:
            case BuiltinType::SveBoolx2:
            case BuiltinType::SveBoolx4:
            case BuiltinType::SveCount:
#endif // CLANG_VERSION_MAJOR >= 17

#if CLANG_VERSION_MAJOR >= 20
            case BuiltinType::MFloat8:
            case BuiltinType::SveMFloat8:
            case BuiltinType::SveMFloat8x2:
            case BuiltinType::SveMFloat8x3:
            case BuiltinType::SveMFloat8x4:
#endif // CLANG_VERSION_MAJOR >= 20
                return TagSve;

            default:
                auto pol = clang::PrintingPolicy(Context->getLangOpts());
                auto warning = std::string("Encountered unsupported BuiltinType kind ") +
                               std::to_string((int)kind) + " for type " +
                               T->getName(pol).str();
                printDiag(Context, DiagnosticsEngine::Warning, warning, src_loc, src_range);
                return TagTypeUnknown;
            }
        }();

        encodeType(T, tag);
    }

    // Clang represents function declarations with parameters as
    // `FunctionProtoType` instances whereas functions w/o parameters are
    // handled as `FunctionNoPrototype` instances. Note: we could handle both
    // cases by overriding `VisitFunctionType` instead of the current
    // two-function solution.
    void VisitFunctionProtoType(const FunctionProtoType *T) {
        LLVM_DEBUG(dbgs() << "Visit ");
        LLVM_DEBUG(T->dump());

        encodeType(T, TagFunctionType, [T, this](CborEncoder *local) {
            CborEncoder arrayEncoder;

            // Function types are encoded with an extra list of types. The
            // return type is always the first element of the list followed by
            // the parameters.
            size_t elts = T->getNumParams() + 1;
            cbor_encoder_create_array(local, &arrayEncoder, elts);

            cbor_encode_uint(&arrayEncoder, encodeQualType(T->getReturnType()));
            for (auto t : T->param_types()) {
                cbor_encode_uint(&arrayEncoder, encodeQualType(t));
            }

            cbor_encoder_close_container(local, &arrayEncoder);

            cbor_encode_boolean(local, T->getExtProtoInfo().Variadic);
            cbor_encode_boolean(local, T->getNoReturnAttr());
            cbor_encode_boolean(local, true); // has arguments
        });

        VisitQualType(T->getReturnType());
        for (auto x : T->param_types()) {
            VisitQualType(x);
        }
    }

    // See `VisitFunctionProtoType`.
    void VisitFunctionNoProtoType(const FunctionNoProtoType *T) {
        encodeType(T, TagFunctionType, [T](CborEncoder *local) {
            CborEncoder arrayEncoder;

            cbor_encoder_create_array(local, &arrayEncoder, 1);

            cbor_encode_uint(&arrayEncoder,
                             uintptr_t(T->getReturnType().getTypePtrOrNull()));

            cbor_encoder_close_container(local, &arrayEncoder);

            cbor_encode_boolean(local, false); // Variable argument function
            cbor_encode_boolean(local, T->getNoReturnAttr());
            cbor_encode_boolean(local, false); // has arguments
        });

        VisitQualType(T->getReturnType());
    }

    void VisitPointerType(const clang::PointerType *T) {
        auto pointee = T->getPointeeType();
        auto qt = encodeQualType(pointee);

        encodeType(T, TagPointer,
                   [qt](CborEncoder *local) { cbor_encode_uint(local, qt); });

        VisitQualType(pointee);
    }

    // Although C does not have references, Clang's built-in functions for
    // `va_start`, `va_end`, etc. may use C++ references in 32-bit mode.
    void VisitReferenceType(const clang::ReferenceType *T) {
        auto pointee = T->getPointeeType();
        auto qt = encodeQualType(pointee);

        encodeType(T, TagReference,
                   [qt](CborEncoder *local) { cbor_encode_uint(local, qt); });

        VisitQualType(pointee);
    }

    void VisitTypedefType(const TypedefType *T);

    void VisitTypeOfType(const TypeOfType *T) {
        auto t = T->desugar();
        auto qt = encodeQualType(t);
        encodeType(T, TagTypeOfType,
                   [qt](CborEncoder *local) { cbor_encode_uint(local, qt); });
        VisitQualType(t);
    }

    void VisitTypeOfExprType(const TypeOfExprType *T) {
        auto t = T->desugar();
        auto qt = encodeQualType(t);
        encodeType(T, TagTypeOfType,
                   [qt](CborEncoder *local) { cbor_encode_uint(local, qt); });
        VisitQualType(t);
    }

    void VisitElaboratedType(const ElaboratedType *T) {
        auto t = T->desugar();
        auto qt = encodeQualType(t);
        encodeType(T, TagElaboratedType,
                   [qt](CborEncoder *local) { cbor_encode_uint(local, qt); });

        VisitQualType(t);
    }

    void VisitDecayedType(const DecayedType *T) {
        auto t = T->desugar();
        auto qt = encodeQualType(t);
        encodeType(T, TagDecayedType,
                   [qt](CborEncoder *local) { cbor_encode_uint(local, qt); });

        VisitQualType(t);
    }
};

class TranslateASTVisitor final
    : public RecursiveASTVisitor<TranslateASTVisitor> {

    struct MacroExpansionInfo {
        StringRef Name;
    };

    ASTContext *Context;
    TypeEncoder typeEncoder;
    CborEncoder *encoder;
    Preprocessor &PP;
    std::vector<std::pair<string, SourceLocation>> files;
    // Mapping from SourceManager FileID to index in files
    DenseMap<FileID, size_t> file_id_mapping;
    std::set<std::pair<void *, ASTEntryTag>> exportedTags;
    std::unordered_map<MacroInfo*, MacroExpansionInfo> macros;

    // This stores a raw encoding of the macro call site SourceLocation, since
    // SourceLocation isn't hashable.
    std::unordered_set<unsigned> macroCallSites;
    SmallVector<MacroInfo*, 1> curMacroExpansionStack;
    StringRef curMacroExpansionSource;

    // Returns true when a new entry is added to exportedTags
    bool markForExport(void *ptr, ASTEntryTag tag) {
        return exportedTags.emplace(ptr, tag).second;
    }

    bool isExported(void *ptr, ASTEntryTag tag) {
        auto search = exportedTags.find(std::make_pair(ptr, tag));
        return search != std::end(exportedTags);
    }

    bool evaluateConstantInt(Expr *E, APSInt &constant) {
        assert(E != nullptr);
        auto value = getIntegerConstantExpr(*E, *Context);

        if (value) {
            constant = *value;
            return true;
        } else {
#if CLANG_VERSION_MAJOR < 8
            APSInt eval_result;
#else
            Expr::EvalResult eval_result;
#endif // CLANG_VERSION_MAJOR
            bool hasValue = E->EvaluateAsInt(eval_result, *Context);
#if CLANG_VERSION_MAJOR < 8
            constant = eval_result;
#else
            if (hasValue) {
                constant = eval_result.Val.getInt();
            }
#endif // CLANG_VERSION_MAJOR
            return hasValue;
        }
    }

    // Template required because Decl and Stmt don't share a common base class
    void encode_entry_raw(void *ast, ASTEntryTag tag, SourceRange loc,
                          const QualType ty, bool rvalue,
                          bool isVaList, bool encodeMacroExpansions,
                          const std::vector<void *> &childIds,
                          std::function<void(CborEncoder *)> extra) {
        if (!markForExport(ast, tag))
            return;

        CborEncoder local, childEnc;
        cbor_encoder_create_array(encoder, &local, CborIndefiniteLength);

        // 0 - Entry ID
        cbor_encode_uint(&local, uintptr_t(ast));

        // 1 - Entry Tag
        cbor_encode_uint(&local, tag);

        // 2 - Entry Children
        cbor_encoder_create_array(&local, &childEnc, childIds.size());
        for (auto x : childIds) {
            if (x == nullptr) {
                cbor_encode_null(&childEnc);
            } else {
                cbor_encode_uint(&childEnc, uintptr_t(x));
            }
        }
        cbor_encoder_close_container(&local, &childEnc);

        // 3 - File number
        // 4 - Begin Line number
        // 5 - Begin Column number
        // 6 - End Line number
        // 7 - End Column number
        encodeSourceSpan(&local, loc, isVaList);

        // 8 - Type ID (only for expressions)
        encode_qualtype(&local, ty);

        // 9 - Is Rvalue (only for expressions)
        cbor_encode_boolean(&local, rvalue);

        // 10 - Macro expansion stack, starting with initial macro call and ending
        // with the innermost replacement.
        cbor_encoder_create_array(&local, &childEnc,
                                  encodeMacroExpansions ? curMacroExpansionStack.size() : 0);
        if (encodeMacroExpansions) {
            for (auto I = curMacroExpansionStack.rbegin(), E = curMacroExpansionStack.rend();
                 I != E; ++I) {
                cbor_encode_uint(&childEnc, uintptr_t(*I));
            }
        }
        cbor_encoder_close_container(&local, &childEnc);

        // 11 - Macro expansion source string, if applicable.
        if (!curMacroExpansionSource.empty()) {
            cbor_encode_string(&local, curMacroExpansionSource.str());
        } else {
            cbor_encode_null(&local);
        }

        // 12.. - Extra entries
        extra(&local);

        cbor_encoder_close_container(encoder, &local);
    }

    void encode_qualtype(CborEncoder *enc, QualType ty) {
        if (ty.getTypePtrOrNull()) {
            cbor_encode_uint(enc, typeEncoder.encodeQualType(ty));
        } else {
            cbor_encode_null(enc);
        }
    }

    void encode_entry(
        Expr *ast, ASTEntryTag tag, const std::vector<void *> &childIds,
        std::function<void(CborEncoder *)> extra = [](CborEncoder *) {}) {
        auto ty = ast->getType();
        auto isVaList = false;
        auto encodeMacroExpansions = true;
#if CLANG_VERSION_MAJOR < 13
        bool isRValue = ast->isRValue();
#else
        // prvalues are equivalent to rvalues in C++03.
        //
        // NOTE: We used to call ast->Classify(*Context).isRValue() but that may
        // result in a segfault on LLVM 18 and 19 for certain string literals.
        // See https://github.com/immunant/c2rust/issues/1124
        bool isRValue = ast->getValueKind() == VK_PRValue;
#endif
        encode_entry_raw(ast, tag, ast->getSourceRange(), ty, isRValue, isVaList,
                         encodeMacroExpansions, childIds, extra);
        typeEncoder.VisitQualTypeOf(ty, ast);
    }

    void encode_entry(
        Stmt *ast, ASTEntryTag tag, const std::vector<void *> &childIds,
        std::function<void(CborEncoder *)> extra = [](CborEncoder *) {}) {
        QualType s = QualType(static_cast<clang::Type *>(nullptr), 0);
        auto rvalue = false;
        auto isVaList = false;
        auto encodeMacroExpansions = false;
        encode_entry_raw(ast, tag, ast->getSourceRange(), s, rvalue, isVaList,
                         encodeMacroExpansions, childIds, extra);
    }

    void encode_entry(
        Decl *ast, ASTEntryTag tag, const std::vector<void *> &childIds,
        const QualType T,
        std::function<void(CborEncoder *)> extra = [](CborEncoder *) {}) {
        auto rvalue = false;
        auto encodeMacroExpansions = false;
        encode_entry_raw(ast, tag, ast->getSourceRange(), T, rvalue,
                         isVaList(ast, T), encodeMacroExpansions, childIds, extra);
    }

    /// Explicitly override the source location of this decl for cases where the
    /// definition location is not the same as the canonical declaration
    /// location.
    void encode_entry(
        Decl *ast, ASTEntryTag tag, SourceRange loc,
        const std::vector<void *> &childIds, const QualType T,
        std::function<void(CborEncoder *)> extra = [](CborEncoder *) {}) {
        auto rvalue = false;
        auto encodeMacroExpansions = false;
        encode_entry_raw(ast, tag, loc, T, rvalue,
                         isVaList(ast, T), encodeMacroExpansions, childIds, extra);
    }

    MacroInfo* getMacroInfo(SourceLocation loc, StringRef &name) const {
        auto &Mgr = Context->getSourceManager();
        Token Result;
        if (!Lexer::getRawToken(Mgr.getSpellingLoc(loc), Result,
                                Mgr, Context->getLangOpts(), false)) {
            if (Result.is(tok::raw_identifier)) {
                PP.LookUpIdentifierInfo(Result);
            }
            IdentifierInfo *IdentifierInfo = Result.getIdentifierInfo();
            if (IdentifierInfo && IdentifierInfo->hadMacroDefinition()) {
                std::pair<FileID, unsigned int> DecLoc =
                    Mgr.getDecomposedExpansionLoc(loc);
                // Get the definition just before the searched location
                // so that a macro referenced in a '#undef MACRO' can
                // still be found.
                SourceLocation BeforeSearchedLocation =
                    Mgr.getMacroArgExpandedLocation(
                        Mgr.getLocForStartOfFile(DecLoc.first)
                            .getLocWithOffset(DecLoc.second - 1));
                MacroDefinition MacroDef = PP.getMacroDefinitionAtLoc(
                    IdentifierInfo, BeforeSearchedLocation);
                MacroInfo *MacroInf = MacroDef.getMacroInfo();
                if (MacroInf) {
                    LLVM_DEBUG(dbgs() << IdentifierInfo->getName() << "\n");
                    LLVM_DEBUG(MacroInf->dump());
                    LLVM_DEBUG(dbgs() << "\n");
                    name = IdentifierInfo->getName();
                    return MacroInf;
                }
            }
        }
        return nullptr;
    }

    bool VisitMacro(StringRef name, SourceLocation loc, MacroInfo *mac, Expr *E) {
        // TODO: handle builtin macros
        if (mac->isBuiltinMacro())
            return false;
        // If this isn't the first time we've seen this macro call site, we
        // shouldn't associate this expression with the macro as it is a subexpr
        // of a previously seen expression.
        if (!macroCallSites.insert(loc.getRawEncoding()).second)
            return false;
        auto &info = macros[mac];
        if (info.Name.empty())
            info.Name = name;
        else if (info.Name != name)
            return false;

        typeEncoder.VisitQualTypeOf(E->getType(), E);
        return true;
    }

    static bool isScalarAsmType(QualType ty) {
        ty = ty.getCanonicalType();
        switch (ty->getTypeClass()) {
        case clang::Type::Builtin:
        case clang::Type::Pointer:
        case clang::Type::Vector:
        case clang::Type::ExtVector:
        case clang::Type::FunctionProto:
        case clang::Type::FunctionNoProto:
        case clang::Type::Enum:
            return true;

        case clang::Type::Atomic:
            return isScalarAsmType(cast<AtomicType>(ty)->getValueType());

        default:
            return false;
        }
    }

  public:
    explicit TranslateASTVisitor(ASTContext *Context, CborEncoder *encoder,
                                 std::unordered_map<void *, QualType> *sugared,
                                 Preprocessor &PP)
        : Context(Context), typeEncoder(Context, encoder, sugared, this),
          encoder(encoder), PP(PP),
          files{{"", {}}} {}

    // Override the default behavior of the RecursiveASTVisitor
    bool shouldVisitImplicitCode() const { return true; }

    // Return the filenames as a vector. Indices correspond to file IDs.
    const std::vector<std::pair<string, SourceLocation>> &getFiles() {
        // Iterate file include locations until fix point
        auto &manager = Context->getSourceManager();
        size_t size;
        do {
            size = files.size();
            /* Cannot use iterator over files here, as getExporterFileId
             * potentially modifies files. This also prevents use of
             * for (auto const &file : files) here. */
            for (size_t idx = 0; idx < size; idx++) {
                auto const &file = files[idx];
                getExporterFileId(manager.getFileID(file.second), false);
            }
        } while (size != files.size());
        return files;
    }

    void encodeMacros() {
        // Sort macros by source location
        std::vector<std::pair<MacroInfo *, MacroExpansionInfo>> macro_vec(
            macros.begin(), macros.end());
        std::sort(macro_vec.begin(), macro_vec.end(),
                  [](const std::pair<MacroInfo *, MacroExpansionInfo> &a,
                     const std::pair<MacroInfo *, MacroExpansionInfo> &b) {
                      return a.first->getDefinitionLoc() <
                             b.first->getDefinitionLoc();
                  });
        for (auto &I : macro_vec) {
            auto &Mac = I.first;
            auto &Info = I.second;
            auto Name = Info.Name;
            ASTEntryTag tag;
            if (Mac->isFunctionLike())
                tag = TagMacroFunctionDef;
            else
                tag = TagMacroObjectDef;

            std::vector<void *> childIds;
            auto range = SourceRange(Mac->getDefinitionLoc(), Mac->getDefinitionEndLoc());
            encode_entry_raw(Mac, tag, range, QualType(), false,
                             false, false, childIds, [Name](CborEncoder *local) {
                                 cbor_encode_string(local, Name.str());
                             });

        }
    }

    void encodeSourcePos(CborEncoder *enc, SourceLocation loc,
                         bool isVaList = false) {
        auto &manager = Context->getSourceManager();

        // A check to see if the Source Location is a Macro
        if (manager.isMacroArgExpansion(loc) ||
            manager.isMacroBodyExpansion(loc))
            loc = manager.getFileLoc(loc);

        auto fileid = getExporterFileId(manager.getFileID(loc), isVaList);
        auto line = manager.getPresumedLineNumber(loc);
        auto col = manager.getPresumedColumnNumber(loc);

        cbor_encode_uint(enc, fileid);
        cbor_encode_uint(enc, line);
        cbor_encode_uint(enc, col);
    }

    void encodeSourceSpan(CborEncoder *enc, SourceRange loc, bool isVaList = false) {
        auto &manager = Context->getSourceManager();

        auto begin = loc.getBegin();
        auto end = loc.getEnd();
        // A check to see if the Source Location is a Macro
        if (manager.isMacroArgExpansion(begin) ||
            manager.isMacroBodyExpansion(begin))
            begin = manager.getFileLoc(begin);
        if (manager.isMacroArgExpansion(end) ||
            manager.isMacroBodyExpansion(end))
            end = manager.getFileLoc(end);

        auto fileid = getExporterFileId(manager.getFileID(begin), isVaList);
        auto begin_line = manager.getPresumedLineNumber(begin);
        auto begin_col = manager.getPresumedColumnNumber(begin);
        auto end_line = manager.getPresumedLineNumber(end);
        auto end_col = manager.getPresumedColumnNumber(end);

        cbor_encode_uint(enc, fileid);
        cbor_encode_uint(enc, begin_line);
        cbor_encode_uint(enc, begin_col);
        cbor_encode_uint(enc, end_line);
        cbor_encode_uint(enc, end_col);
    }

    uint64_t getExporterFileId(FileID id, bool isVaList) {
        if (id.isInvalid())
            return 0;

        auto file = file_id_mapping.find(id);
        if (file != file_id_mapping.end())
            return file->second;

        auto &manager = Context->getSourceManager();
        auto entry = manager.getFileEntryForID(id);

        auto filename = string("?");
        if (entry)
            filename = entry->tryGetRealPathName().str();

        if (filename == "?" && isVaList)
            filename = "vararg";

        auto new_id = files.size();
        files.push_back(std::make_pair(filename, manager.getIncludeLoc(id)));
        file_id_mapping[id] = new_id;
        return new_id;
    }

    //
    // Statements
    //

    bool VisitAttributedStmt(AttributedStmt *S) {
        std::vector<void*> childIds { S->getSubStmt() };
        encode_entry(S, TagAttributedStmt, childIds,
                     [S](CborEncoder *array){
                         for (auto s: S->getAttrs()) {
                             cbor_encode_text_stringz(array, s->getSpelling());
                         }
        });
        return true;
    }

    bool VisitCompoundStmt(CompoundStmt *CS) {
        std::vector<void *> childIds;
        for (auto x : CS->children()) {
            childIds.push_back(x);
        }

        encode_entry(CS, TagCompoundStmt, childIds);
        return true;
    }

    bool VisitReturnStmt(ReturnStmt *RS) {
        std::vector<void *> childIds = {RS->getRetValue()};
        encode_entry(RS, TagReturnStmt, childIds);
        return true;
    }

    bool VisitDoStmt(DoStmt *S) {
        std::vector<void *> childIds = {S->getBody(), S->getCond()};
        encode_entry(S, TagDoStmt, childIds);
        return true;
    }

    bool VisitGotoStmt(GotoStmt *GS) {
        std::vector<void *> childIds = {GS->getLabel()->getStmt()};
        encode_entry(GS, TagGotoStmt, childIds);
        return true;
    }

    bool VisitIndirectGotoStmt(IndirectGotoStmt *IGS) {
        std::string msg =
            "the GNU C labels-as-values extension is not supported. Aborting.";

        printDiag(Context, DiagnosticsEngine::Error, msg, IGS);
        abort();
    }

    bool VisitStaticAssertDecl(StaticAssertDecl *SAD) {
        std::vector<void *> childIds = {SAD->getAssertExpr()};
        auto msg = SAD->getMessage();
        if (msg != nullptr)
            childIds.push_back(msg);
        encode_entry(SAD, TagStaticAssertDecl, childIds, QualType()); // 4th argument unused
        return true;
    }

    bool VisitLabelStmt(LabelStmt *LS) {

        std::vector<void *> childIds = {LS->getSubStmt()};
        encode_entry(LS, TagLabelStmt, childIds, [LS](CborEncoder *array) {
            cbor_encode_text_stringz(array, LS->getName());
        });
        return true;
    }

    bool VisitNullStmt(NullStmt *NS) {
        std::vector<void *> childIds;
        encode_entry(NS, TagNullStmt, childIds);
        return true;
    }

    bool VisitIfStmt(IfStmt *IS) {
        std::vector<void *> childIds = {IS->getCond(), IS->getThen(),
                                        IS->getElse()};
        encode_entry(IS, TagIfStmt, childIds);
        return true;
    }

    bool VisitForStmt(ForStmt *FS) {
        std::vector<void *> childIds = {FS->getInit(), FS->getCond(),
                                        FS->getInc(), FS->getBody()};
        encode_entry(FS, TagForStmt, childIds);
        return true;
    }

    bool VisitWhileStmt(WhileStmt *WS) {
        std::vector<void *> childIds = {WS->getCond(), WS->getBody()};
        encode_entry(WS, TagWhileStmt, childIds);
        return true;
    }

    bool VisitDeclStmt(DeclStmt *DS) {

        LLVM_DEBUG(dbgs() << "Visit ");
        LLVM_DEBUG(DS->dumpColor());

        // We copy only canonical decls and VarDecl's that are extern/local.
        // For more on the latter, see the comment at the top of
        // `VisitVarDecl`
        std::vector<void *> childIds;
        std::copy_if(DS->decl_begin(), DS->decl_end(),
                     std::back_inserter(childIds), [](Decl *decl) {
                         if (decl->isCanonicalDecl())
                             return true;

                         if (VarDecl *var_decl = dyn_cast<VarDecl>(decl))
                             return var_decl->isExternC() &&
                                    var_decl->isLocalVarDecl();

                         return false;
                     });

        encode_entry(DS, TagDeclStmt, childIds);
        return true;
    }

    bool VisitBreakStmt(BreakStmt *BS) {
        std::vector<void *> childIds;
        encode_entry(BS, TagBreakStmt, childIds);
        return true;
    }

    bool VisitContinueStmt(ContinueStmt *S) {
        std::vector<void *> childIds;
        encode_entry(S, TagContinueStmt, childIds);
        return true;
    }

    bool VisitCaseStmt(CaseStmt *CS) {
        auto expr = CS->getLHS();

        APSInt value;
        if (!evaluateConstantInt(expr, value)) {
            std::string msg =
                "Expression in case statement is not an integer. Aborting.";
            printDiag(Context, DiagnosticsEngine::Error, msg, CS);
            abort();
        }

        std::vector<void *> childIds{expr, CS->getSubStmt()};
        encode_entry(CS, TagCaseStmt, childIds, [value](CborEncoder *extra) {
            cbor_encode_boolean(extra, value.isSigned());
            if (value.isSigned()) {
                cbor_encode_int(extra, value.getSExtValue());
            } else {
                cbor_encode_uint(extra, value.getZExtValue());
            }
        });
        return true;
    }

    bool VisitSwitchStmt(SwitchStmt *SS) {
        std::vector<void *> childIds = {SS->getCond(), SS->getBody()};
        encode_entry(SS, TagSwitchStmt, childIds);
        return true;
    }

    bool VisitDefaultStmt(DefaultStmt *DS) {
        std::vector<void *> childIds = {DS->getSubStmt()};
        encode_entry(DS, TagDefaultStmt, childIds);
        return true;
    }

    // Encode ASM statements using the following encoding:
    // Child IDs: inputs expressions, output expressions
    // Extras:
    //   Boolean true if volatile, false otherwise
    //   Assembly program fragment string
    //   List of input constraints
    //   List of output constraints
    //   List of clobbers
    //
    // The number of input and output expressions in the child id list will
    // match the length of the corresponding constraint arrays.
    bool VisitGCCAsmStmt(GCCAsmStmt *E) {

        std::vector<void *> childIds;
        copy(E->begin_inputs(), E->end_inputs(), std::back_inserter(childIds));
        copy(E->begin_outputs(), E->end_outputs(),
             std::back_inserter(childIds));

        encode_entry(E, TagAsmStmt, childIds, [E, this](CborEncoder *local) {
            cbor_encode_boolean(local, E->isVolatile());
            cbor_encode_string(local, E->generateAsmString(*Context));

            std::vector<std::string> outputs, inputs, clobbers;
            std::vector<TargetInfo::ConstraintInfo> output_infos;
            for (unsigned i = 0, num = E->getNumOutputs(); i < num; ++i) {
                auto constraint = E->getOutputConstraint(i);
                std::string convertedConstraint;
                TargetInfo::ConstraintInfo info(constraint, E->getOutputName(i));
                this->Context->getTargetInfo().validateOutputConstraint(info);
                convertedConstraint += info.isReadWrite() ? '+' : '=';
                if (info.earlyClobber()) {
                    convertedConstraint += '&';
                }
                if (info.allowsMemory() ||
                    !isScalarAsmType(E->getOutputExpr(i)->getType())) {
                    // This is a memory-only operand, so we need to make sure
                    // we pass it in by-address and not by-value (the value
                    // of the operand is actually the memory address to write
                    // into); clang does this conversion, but rustc doesn't
                    convertedConstraint += '*';
                }

#if CLANG_VERSION_MAJOR < 21
                convertedConstraint += SimplifyConstraint(constraint.str());
#else
                convertedConstraint += SimplifyConstraint(constraint);
#endif

                outputs.push_back(convertedConstraint);
                output_infos.push_back(std::move(info));
            }
            for (unsigned i = 0, num = E->getNumInputs(); i < num; ++i) {
                auto constraint = E->getInputConstraint(i);
                std::string convertedConstraint;
                TargetInfo::ConstraintInfo info(constraint, E->getInputName(i));
                this->Context->getTargetInfo().validateInputConstraint(output_infos, info);
                if ((info.allowsMemory() && !info.allowsRegister()) ||
                    !isScalarAsmType(E->getInputExpr(i)->getType())) {
                    // See above
                    convertedConstraint += '*';
                }
#if CLANG_VERSION_MAJOR < 21
                convertedConstraint += SimplifyConstraint(constraint.str());
#else
                convertedConstraint += SimplifyConstraint(constraint);
#endif
                inputs.emplace_back(convertedConstraint);
            }
            for (unsigned i = 0, num = E->getNumClobbers(); i < num; ++i) {
                auto clobber = E->getClobber(i);
                if (clobber != "memory" && clobber != "cc")
                    clobber = Context->getTargetInfo().getNormalizedGCCRegisterName(clobber);
                clobbers.emplace_back(clobber);
            }
            cbor_encode_string_array(local, ArrayRef<std::string>(inputs));
            cbor_encode_string_array(local, ArrayRef<std::string>(outputs));
            cbor_encode_string_array(local, ArrayRef<std::string>(clobbers));
        });
        return true;
    }

    std::string SimplifyConstraint(const std::string &constraint) {
        std::string res;
        const char *p = constraint.c_str();
        while (*p) {
            switch (*p) {
            case '=':
            case '+':
            case '*':
            case '&':
                break;

            case 'g':
                res += "imr";
                break;

            case ',':
                res += '|';
                break;

            // TODO: handle more cases

            default:
                res += this->Context->getTargetInfo().convertConstraint(p);
                break;
            }
            p++;
        }
        return res;
    }

    //
    // Expressions
    //

    bool VisitExpr(Expr *E) {
        curMacroExpansionStack.clear();
        curMacroExpansionSource = StringRef();

        // We only translate constant macro objects to Rust consts, so this
        // expression must be constant.
        // if (!E->isConstantInitializer(*Context, false))
        //     return true;

        auto &Mgr = Context->getSourceManager();
        auto Range = E->getSourceRange();
        LLVM_DEBUG(dbgs() << "Checking expr for macro expansion: ");
        LLVM_DEBUG(E->dump());
        LLVM_DEBUG(Range.getBegin().dump(Mgr));
        LLVM_DEBUG(Range.getEnd().dump(Mgr));

        auto Begin = Range.getBegin();
        auto End = Range.getEnd();

        // Check that we are only expanding a single macro call.
        if (!Begin.isMacroID() || !End.isMacroID() ||
            Mgr.getImmediateMacroCallerLoc(Begin) != Mgr.getImmediateMacroCallerLoc(End))
            return true;

        if (Begin.isMacroID()) {
#if CLANG_VERSION_MAJOR < 7
            // getImmediateExpansionRange in LLVM<7 returns a
            // std::pair<SourceLocation, SourceLocation>, which we need to
            // translate to a CharSourceRange for Lexer::getSourceText
            auto LocPair = Mgr.getImmediateExpansionRange(Begin);
            auto ExpansionRange = CharSourceRange::getCharRange(LocPair.first, LocPair.second);
#else // CLANG_VERSION_MAJOR >= 7
            auto ExpansionRange = Mgr.getImmediateExpansionRange(Begin);
#endif
            curMacroExpansionSource =
                Lexer::getSourceText(ExpansionRange, Mgr, Context->getLangOpts());
        }

        // The macro stack unwound by getImmediateMacroCallerLoc and friends
        // starts with literal replacement and works it's way to the macro call
        // that was replaced.
        while (Begin.isMacroID()) {
#if CLANG_VERSION_MAJOR < 7
            auto ExpansionRange = Mgr.getImmediateExpansionRange(Begin);
            auto ExpansionBegin = ExpansionRange.first;
            auto ExpansionEnd = ExpansionRange.second;
#else // CLANG_VERSION_MAJOR >= 7
            auto ExpansionRange = Mgr.getImmediateExpansionRange(Begin).getAsRange();
            auto ExpansionBegin = ExpansionRange.getBegin();
            auto ExpansionEnd = ExpansionRange.getEnd();
#endif
            StringRef name;
            MacroInfo *mac = getMacroInfo(ExpansionBegin, name);

            if (!mac || mac->getNumTokens() == 0)
                return true;
            auto ReplacementBegin = mac->getReplacementToken(0).getLocation();
            auto ReplacementEnd = mac->getDefinitionEndLoc();
            // Verify that this expansion covers the entire macro replacement
            // definition, i.e. E is not a subexpression of the macro
            // replacement.
            if (Mgr.getSpellingLoc(Begin) != ReplacementBegin ||
                Mgr.getSpellingLoc(End) != ReplacementEnd)
                return true;

            Begin = ExpansionBegin;
            End = ExpansionEnd;

            if (VisitMacro(name, Begin, mac, E)) {
                curMacroExpansionStack.push_back(mac);
            }
        }
        return true;
    }


    bool VisitVAArgExpr(VAArgExpr *E) {
        std::vector<void *> childIds{E->getSubExpr()};
        encode_entry(E, TagVAArgExpr, childIds);
        return true;
    }

    bool VisitGenericSelectionExpr(GenericSelectionExpr *E) {
        printDiag(Context, DiagnosticsEngine::Warning, "Encountered unsupported generic selection expression", E);
        return true;
    }

    bool VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *E) {
        std::vector<void *> childIds{
            E->isArgumentType() ? nullptr : E->getArgumentExpr()};
        auto t = E->getTypeOfArgument();
        auto qt = typeEncoder.encodeQualType(t);
        encode_entry(
            E, TagUnaryExprOrTypeTraitExpr, childIds,
            [E, t, qt, this](CborEncoder *extras) {
                switch (E->getKind()) {
                case UETT_SizeOf:
                    cbor_encode_text_stringz(extras, "sizeof");
                    break;
                case UETT_AlignOf:
                    cbor_encode_text_stringz(extras, "alignof");
                    break;
                case UETT_VecStep:
                    cbor_encode_text_stringz(extras, "vecstep");
                    break;
                case UETT_OpenMPRequiredSimdAlign:
                    cbor_encode_text_stringz(extras, "openmprequiredsimdalign");
                    break;
#if CLANG_VERSION_MAJOR >= 8
                case UETT_PreferredAlignOf: {
                    // This is GCC's `__alignof` intrinsic. To match its
                    // behavior, we only want to use preferred alignment if
                    // we're dealing with a double, long long, or unsigned
                    // long long. Otherwise, we should use the ABI-specified
                    // alignment. See ASTContext::getPreferredTypeAlign
                    // (clang/lib/AST/ASTContext.cpp:2215) for more
                    // details. We replicate this logic here and use the
                    // preferred alignment if needed.

                    const clang::Type *T = t.getTypePtr();
                    TypeInfo TI = this->Context->getTypeInfo(T);
                    unsigned ABIAlign = TI.Align;
                    T = T->getBaseElementTypeUnsafe();
                    // Double and long long should be naturally aligned if
                    // possible.
                    if (const auto *CT = T->getAs<ComplexType>())
                        T = CT->getElementType().getTypePtr();
                    if (const auto *ET = T->getAs<EnumType>())
                        T = ET->getDecl()->getIntegerType().getTypePtr();
                    if (T->isSpecificBuiltinType(BuiltinType::Double) ||
                        T->isSpecificBuiltinType(BuiltinType::LongLong) ||
                        T->isSpecificBuiltinType(BuiltinType::ULongLong))
                        cbor_encode_text_stringz(extras, "preferredalignof");
                    else
                        cbor_encode_text_stringz(extras, "alignof");
                    break;
                }
#endif // CLANG_VERSION_MAJOR
                default:
                    printDiag(Context, DiagnosticsEngine::Error, "Could not match UnaryExprOrTypeTrait", E);
                    abort();
                }
                cbor_encode_uint(extras, qt);
            });
        typeEncoder.VisitQualTypeOf(t, E);
        return true;
    }

    bool VisitStmtExpr(StmtExpr *E) {
        std::vector<void *> childIds{E->getSubStmt()};
        encode_entry(E, TagStmtExpr, childIds);
        return true;
    }

    bool VisitOffsetOfExpr(OffsetOfExpr *E) {
        std::vector<void *> childIds;

        auto value = getIntegerConstantExpr(*E, *this->Context);

        encode_entry(
            E, TagOffsetOfExpr, childIds, [this, E, value](CborEncoder *extras) {
                if (value) {
                    cbor_encode_uint(extras, value->getZExtValue());
                } else {
                    // It's possible to get a non ICE in a field array like so:
                    // offsetof(S, field[idx]) so here we are encoding the type,
                    // field, and array input expr
                    auto ty = E->getTypeSourceInfo()->getType();
                    auto qt = typeEncoder.encodeQualType(ty);

                    assert(E->getNumComponents() == 2 &&
                           "Found unsupported number of offsetof components");

                    auto component0 = E->getComponent(0);
                    auto component1 = E->getComponent(1);

                    assert(component0.getKind() == 1 &&
                           "Found unsupported offsetof component kind");
                    assert(component1.getKind() == 0 &&
                           "Found unsupported offsetof component kind");

                    auto field = component0.getField()->getCanonicalDecl();
                    auto expr0 = E->getIndexExpr(0);

                    cbor_encode_null(extras);
                    cbor_encode_uint(extras, qt);
                    cbor_encode_uint(extras, uintptr_t(field));
                    cbor_encode_uint(extras, uintptr_t(expr0));
                }
            });

        // If this is the only use of the struct type, we need to ensure that it
        // gets visited.
        if (!value) {
            auto ty = E->getTypeSourceInfo()->getType();
            typeEncoder.VisitQualTypeOf(ty, E);
        }

        return true;
    }

    bool VisitParenExpr(ParenExpr *E) {
        std::vector<void *> childIds{E->getSubExpr()};
        encode_entry(E, TagParenExpr, childIds);
        return true;
    }

    /*
     [C99 6.5.2.3] Structure and Union Members.
     Children:
     - base expression
     - field declaration
     Extras:
     - true: is arrow; false: is dot
     */
    bool VisitMemberExpr(MemberExpr *E) {
        std::vector<void *> childIds{E->getBase(),
                                     E->getMemberDecl()->getCanonicalDecl()};
        encode_entry(E, TagMemberExpr, childIds, [E](CborEncoder *extras) {
            cbor_encode_boolean(extras, E->isArrow());
        });
        return true;
    }

    /*
     [C99 6.5.2.5] Compound literal expression
     Children:
     - initializer expression
     Extras: (none)
     */
    bool VisitCompoundLiteralExpr(CompoundLiteralExpr *E) {
        std::vector<void *> childIds{E->getInitializer()};
        encode_entry(E, TagCompoundLiteralExpr, childIds);
        return true;
    }

    bool VisitExtVectorElementExpr(ExtVectorElementExpr *E) {
        printDiag(Context, DiagnosticsEngine::Warning, "Encountered unsupported vector element expression", E);
        return true;
    }

    /*
     Describes a C initializer list
     Children: expressions
     Extras: (none)
     */
    bool VisitInitListExpr(InitListExpr *ILE) {
        auto inits = ILE->inits();

        std::vector<void *> childIds(inits.begin(), inits.end());
        encode_entry(ILE, TagInitListExpr, childIds,
                     [ILE](CborEncoder *extras) {
                         auto union_field = ILE->getInitializedFieldInUnion();
                         if (union_field) {
                             cbor_encode_uint(extras, uintptr_t(union_field));
                         } else {
                             cbor_encode_null(extras);
                         }

                         auto syntax = ILE->getSyntacticForm();
                         if (syntax) {
                             cbor_encode_uint(extras, uintptr_t(syntax));
                         } else {
                             cbor_encode_null(extras);
                         }
                     });

        return true;
    }

    /*
     Describes a designated initializer expression
     Children: initializer
     Extras:
     - Array of designators

     Designator format:
     [1, array_index]            { [1]      = 2 }
     [2, field_id]               { .field   = 1 }
     [3, array_start, array_end] { [1 .. 2] = 3 }
     */
    bool VisitDesignatedInitExpr(DesignatedInitExpr *E) {
        std::vector<void *> childIds{E->getInit()};

        encode_entry(
            E, TagDesignatedInitExpr, childIds, [this, E](CborEncoder *extras) {
                CborEncoder array;
                cbor_encoder_create_array(extras, &array,
                                          E->designators().size());
                for (auto &designator : E->designators()) {
                    CborEncoder entry;
                    if (designator.isArrayDesignator()) {
                        cbor_encoder_create_array(&array, &entry, 2);
                        cbor_encode_int(&entry, 1);

                        auto constant = getIntegerConstantExpr(
                            *E->getArrayIndex(designator), *Context);

                        assert(
                            constant &&
                            "designator array index not integer constant expr");
                        cbor_encode_int(&entry, constant->getZExtValue());

                    } else if (designator.isFieldDesignator()) {
                        cbor_encoder_create_array(&array, &entry, 2);
                        cbor_encode_int(&entry, 2);
                        cbor_encode_uint(&entry,
#if CLANG_VERSION_MAJOR < 17
                                         uintptr_t(designator.getField()));
#else
                                         uintptr_t(designator.getFieldDecl()));
#endif // CLANG_VERSION_MAJOR
                    } else if (designator.isArrayRangeDesignator()) {
                        cbor_encoder_create_array(&array, &entry, 3);
                        cbor_encode_int(&entry, 3);

                        auto constant = getIntegerConstantExpr(
                            *E->getArrayRangeStart(designator), *Context);
                        assert(constant && "designator array range start not "
                                          "integer constant expr");
                        cbor_encode_int(&entry, constant->getZExtValue());

                        constant = getIntegerConstantExpr(
                            *E->getArrayRangeEnd(designator), *Context);
                        assert(constant && "designator array range end not "
                                          "integer constant expr");
                        cbor_encode_int(&entry, constant->getZExtValue());
                    } else {
                        assert(0 && "unknown designator kind");
                    }
                    cbor_encoder_close_container(&array, &entry);
                }
                cbor_encoder_close_container(extras, &array);
            });

        return true;
    }

    bool VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E) {
        printDiag(Context, DiagnosticsEngine::Warning, "Encountered unsupported designated init update expression", E);
        return true;
    }

    bool VisitPredefinedExpr(PredefinedExpr *E) {
        std::vector<void *> childIds{E->getFunctionName()};
        encode_entry(E, TagPredefinedExpr, childIds);
        return true;
    }

    bool VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) {
        std::vector<void *> childIds;
        encode_entry(E, TagImplicitValueInitExpr, childIds);
        return true;
    }

    bool VisitParenListExpr(ParenListExpr *E) {
        printDiag(Context, DiagnosticsEngine::Warning, "Encountered unsupported paren list expression", E);
        return true;
    }

    bool VisitImplicitCastExpr(ImplicitCastExpr *ICE) {
        std::vector<void *> childIds = {ICE->getSubExpr()};
        encode_entry(
            ICE, TagImplicitCastExpr, childIds, [ICE](CborEncoder *array) {
                auto cast_name = ICE->getCastKindName();

#if CLANG_VERSION_MAJOR < 8
                if (ICE->getCastKind() == CastKind::CK_BitCast) {
#else  // Incompatible const qualifier pointer casts are now NoOp casts if they
       // are in the same namespace. See Sema::CheckAssignmentConstraints
       // (SemaExpr.cpp:7951)
                if (ICE->getCastKind() == CastKind::CK_NoOp) {
#endif // CLANG_VERSION_MAJOR
                    auto source_type = ICE->getSubExpr()->getType();
                    auto target_type = ICE->getType();

                    if (auto *source_ptr = dyn_cast_or_null<clang::PointerType>(
                            source_type.getTypePtrOrNull())) {
                        if (auto *target_ptr =
                                dyn_cast_or_null<clang::PointerType>(
                                    target_type.getTypePtrOrNull())) {

                            auto source_pointee = source_ptr->getPointeeType();
                            auto target_pointee = target_ptr->getPointeeType();

                            if (target_pointee.isConstQualified() &&
                                source_pointee->getUnqualifiedDesugaredType() ==
                                    target_pointee
                                        ->getUnqualifiedDesugaredType()) {
                                cast_name = "ConstCast";
                            }
                        }
                    }
                }

                cbor_encode_text_stringz(array, cast_name);
            });
        return true;
    }

    bool VisitCStyleCastExpr(CStyleCastExpr *E) {
        std::vector<void *> childIds = {E->getSubExpr()};

        if (E->getCastKind() == CastKind::CK_ToUnion) {

            FieldDecl *target_field = nullptr;
            auto src_type =
                E->getSubExpr()->getType()->getUnqualifiedDesugaredType();

            for (auto &&field :
                 E->getType()->getAsUnionType()->getDecl()->fields()) {
                auto field_type =
                    field->getType()->getUnqualifiedDesugaredType();

                if (field_type == src_type) {
                    target_field = field;
                    break;
                }
            }

            childIds.push_back(target_field);
        }

        encode_entry(E, TagCStyleCastExpr, childIds, [E](CborEncoder *array) {
            cbor_encode_text_stringz(array, E->getCastKindName());
        });
        return true;
    }

    bool VisitUnaryOperator(UnaryOperator *UO) {
        std::vector<void *> childIds = {UO->getSubExpr()};
        encode_entry(UO, TagUnaryOperator, childIds, [UO](CborEncoder *array) {
            cbor_encode_string(array, UO->getOpcodeStr(UO->getOpcode()).str());
            cbor_encode_boolean(array, UO->isPrefix());
        });
        return true;
    }

    bool VisitBinaryOperator(BinaryOperator *BO) {
        std::vector<void *> childIds = {BO->getLHS(), BO->getRHS()};

        QualType computationLHSType, computationResultType;

        if (auto cao = dyn_cast_or_null<CompoundAssignOperator>(BO)) {
            computationLHSType = cao->getComputationLHSType();
            computationResultType = cao->getComputationResultType();
            typeEncoder.VisitQualTypeOf(computationLHSType, cao);
            typeEncoder.VisitQualTypeOf(computationResultType, cao);
        }

        encode_entry(BO, TagBinaryOperator, childIds,
                     [this, BO, computationLHSType,
                      computationResultType](CborEncoder *array) {
                         cbor_encode_string(array, BO->getOpcodeStr().str());

                         encode_qualtype(array, computationLHSType);
                         encode_qualtype(array, computationResultType);
                     });
        return true;
    }

    bool VisitConditionalOperator(ConditionalOperator *CO) {
        std::vector<void *> childIds = {CO->getCond(), CO->getTrueExpr(),
                                        CO->getFalseExpr()};
        encode_entry(CO, TagConditionalOperator, childIds);
        return true;
    }

    bool VisitBinaryConditionalOperator(BinaryConditionalOperator *CO) {
        std::vector<void *> childIds = {CO->getCommon(), CO->getFalseExpr()};
        encode_entry(CO, TagBinaryConditionalOperator, childIds);
        return true;
    }

    bool VisitDeclRefExpr(DeclRefExpr *DRE) {

        // This avoids an infinite recursive loop that can be caused by the
        // TraverseDecl below.
        if (isExported(DRE, TagDeclRefExpr))
            return true;

        LLVM_DEBUG(dbgs() << "Visiting ");
        LLVM_DEBUG(DRE->dumpColor());
        LLVM_DEBUG(DRE->getDecl()->getType()->dump());
        LLVM_DEBUG(DRE->getType()->dump());

        auto decl = DRE->getDecl()->getCanonicalDecl();

        std::vector<void *> childIds{decl};
        encode_entry(DRE, TagDeclRefExpr, childIds);

        // Uses of undeclared declarations might never be traversed if we don't
        // manually traverse them from this point.
        TraverseDecl(decl);

        return true;
    }

    bool VisitCallExpr(CallExpr *CE) {
        std::vector<void *> childIds = {CE->getCallee()};
        for (auto x : CE->arguments()) {
            childIds.push_back(x);
        }
        encode_entry(CE, TagCallExpr, childIds);
        return true;
    }

    bool VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
        std::vector<void *> childIds = {E->getLHS(), E->getRHS()};
        encode_entry(E, TagArraySubscriptExpr, childIds);
        return true;
    }

    bool VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
        auto children = E->children();
        std::vector<void *> childIds(std::begin(children), std::end(children));
        encode_entry(E, TagShuffleVectorExpr, childIds);
        return true;
    }

    bool VisitConvertVectorExpr(ConvertVectorExpr *E) {
        auto children = E->children();
        std::vector<void *> childIds(std::begin(children), std::end(children));
        encode_entry(E, TagConvertVectorExpr, childIds);
        return true;
    }

#if CLANG_VERSION_MAJOR >= 9
    bool VisitBuiltinBitCastExpr(BuiltinBitCastExpr *E) {
        auto children = E->children();
        std::vector<void *> childIds(std::begin(children), std::end(children));
        encode_entry(E, TagBuiltinBitCastExpr, childIds);
        return true;
    }
#endif

    bool VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E) {
        auto children = E->children();
        std::vector<void *> childIds(std::begin(children), std::end(children));
        encode_entry(E, TagMaterializeTemporaryExpr, childIds);
        return true;
    }
    bool VisitExprWithCleanups(ExprWithCleanups *E) {
        auto children = E->children();
        std::vector<void *> childIds(std::begin(children), std::end(children));
        encode_entry(E, TagExprWithCleanups, childIds);
        return true;
    }

#if CLANG_VERSION_MAJOR >= 8
    bool VisitConstantExpr(ConstantExpr *E) {
        auto children = E->children();
        std::vector<void *> childIds(std::begin(children), std::end(children));

        APSInt value;
        bool hasValue = evaluateConstantInt(E, value);

        encode_entry(E, TagConstantExpr, childIds,
                     [hasValue, value](CborEncoder *extra) {
                         cbor_encode_boolean(extra, hasValue);
                         if (hasValue) {
                             cbor_encode_boolean(extra, value.isSigned());
                             if (value.isSigned()) {
                                 cbor_encode_int(extra, value.getSExtValue());
                             } else {
                                 cbor_encode_uint(extra, value.getZExtValue());
                             }
                         }
                     });

        return true;
    }
#endif // CLANG_VERSION_MAJOR

    bool VisitAtomicExpr(AtomicExpr *E) {
        auto children = E->children();
        std::vector<void *> childIds(std::begin(children), std::end(children));
        encode_entry(E, TagAtomicExpr, childIds,
                     [E, this](CborEncoder *array) {
                         switch (E->getOp()) {
#define BUILTIN(ID, TYPE, ATTRS)
#define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \
                             case AtomicExpr::AO ## ID:                 \
                                 cbor_encode_string(array, #ID);       \
                                 break;
#if CLANG_VERSION_MAJOR >= 19
#include "clang/Basic/Builtins.inc"
#else
#include "clang/Basic/Builtins.def"
#endif
                         default: printDiag(Context, DiagnosticsEngine::Error, "Unknown atomic builtin: " +
                                             std::to_string(E->getOp()), E);
                         };
                     });
        return true;
    }

    bool VisitAddrLabelExpr(AddrLabelExpr *E) {
        printDiag(Context, DiagnosticsEngine::Warning, "Cannot translate GNU address of label expression", E);
        return true;
    }

    bool VisitChooseExpr(ChooseExpr *E) {
        auto children = E->children();
        std::vector<void *> childIds(std::begin(children), std::end(children));
        encode_entry(E, TagChooseExpr, childIds,
                     [E](CborEncoder *array) {
                         cbor_encode_boolean(array, E->isConditionTrue());
                     });
        return true;
    }

    bool VisitGNUNullExpr(GNUNullExpr *E) {
        printDiag(Context, DiagnosticsEngine::Warning, "Encountered unsupported GNU extension: null expression", E);
        return true;
    }


    //
    // Declarations
    //

    // Some function declarations are also function definitions.
    // This method handles both types of declarations.
    bool VisitFunctionDecl(FunctionDecl *FD) {
        if (!FD->isCanonicalDecl()) {
            // Emit non-canonical decl so we have a placeholder to attach comments to
            std::vector<void *> childIds = {FD->getCanonicalDecl()};
            auto span = FD->getSourceRange();
            if (FD->doesThisDeclarationHaveABody())
                span = FD->getCanonicalDecl()->getSourceRange();
            encode_entry(FD, TagNonCanonicalDecl, span, childIds, FD->getType());
            typeEncoder.VisitQualTypeOf(FD->getType(), FD);
            return true;
        }

        // if (FD->hasBody() && FD->isVariadic()) {
        //   //   auto fname = FD->getNameString();
        //     printDiag(Context, DiagnosticsEngine::Warning, "variadic functions are not fully supported.", FD);
        // }

        // Use the parameters from the function declaration
        // the defines the body, if one exists.
        const FunctionDecl *paramsFD = FD;
        auto body =
            FD->getBody(paramsFD); // replaces its argument if body exists

        // Avoid getting params from an implicit decl if a subsequent non-implicit decl exists.
        // Implicit decls will not have names for params, but more importantly, they will never
        // reference header-declared typedefs, so we would miss the fact that e.g. malloc is
        // declared to accept `size_t` in its stdlib.h declaration, while its implicit declaration
        // accepts the built-in `unsigned long`.
        if (FD->isImplicit()) {
            paramsFD = FD->getMostRecentDecl();
        }

        std::vector<void *> childIds;
        for (auto x : paramsFD->parameters()) {
            auto cd = x->getCanonicalDecl();
            childIds.push_back(cd);
            TraverseDecl(cd);
        }

        childIds.push_back(body);

        // We prefer non-implicit decls for their type information.
        auto functionType = paramsFD->getType();
        auto span = paramsFD->getSourceRange();
        encode_entry(
            FD, TagFunctionDecl, span, childIds, functionType,
            [this, FD](CborEncoder *array) {
                auto name = FD->getNameAsString();
                cbor_encode_string(array, name);

                auto is_global = FD->isGlobal();
                cbor_encode_boolean(array, is_global);

                auto def = FD->getDefinition();
                bool is_inline = def && def->isInlineSpecified();
                cbor_encode_boolean(array, is_inline);

                auto is_main = FD->isMain();
                cbor_encode_boolean(array, is_main);

                auto bid = FD->getBuiltinID();
                cbor_encode_boolean(
                    array, bid && !Context->BuiltinInfo.getHeaderName(bid));

                bool is_extern = FD->getStorageClass() == SC_Extern;
                cbor_encode_boolean(array, is_extern);

                // The rules for when inlined functions are externally visible
                // are complex, so we export the visibility computed by clang.
                bool can_query_inline_visibility = is_inline &&
                    (FD->doesThisDeclarationHaveABody() ||
                     FD->willHaveBody() ||
                     FD->hasAttr<AliasAttr>());
                bool is_inline_externally_visible = can_query_inline_visibility
                    && FD->isInlineDefinitionExternallyVisible();
                cbor_encode_boolean(array, is_inline_externally_visible);

                // Encode attribute names and relevant info if supported
                CborEncoder attr_info;
                bool has_attrs = def ? def->hasAttrs() : FD->hasAttrs();

                cbor_encoder_create_array(array, &attr_info,
                                          CborIndefiniteLength);

                if (has_attrs) {
                    auto attrs = def ? def->getAttrs() : FD->getAttrs();

                    for (auto attr : attrs) {
                        cbor_encode_text_stringz(&attr_info,
                                                 attr->getSpelling());

                        if (auto *aa = dyn_cast<AliasAttr>(attr)) {
                            cbor_encode_text_stringz(
                                &attr_info, aa->getAliasee().str().c_str());
                        } else if (auto *va = dyn_cast<VisibilityAttr>(attr)) {
                            const char *vis = VisibilityAttr::ConvertVisibilityTypeToStr(va->getVisibility());
                            cbor_encode_text_stringz(&attr_info, vis);
                        }
                    }
                }

                cbor_encoder_close_container(array, &attr_info);
            });
        typeEncoder.VisitQualTypeOf(functionType, paramsFD);

        return true;
    }

    /* I don't think this adds anything that we don't get from VarDecl
    bool VisitParmVarDecl(ParmVarDecl *PVD)
    {
        std::vector<void*> childIds = { PVD->getDefinition() };
        encode_entry_extra(encoder, PVD, TagParmVarDecl, childIds,
                           [PVD](CborEncoder *array){
                               auto name = PVD->getNameAsString();
                               cbor_encode_text_stringz(array, name.c_str());
                           });
        return true;
    }*/

    bool VisitVarDecl(VarDecl *VD) {
        // Skip non-canonical decls, as long as they aren't 'extern'.
        // Unfortunately, if there are two 'extern' variables in different
        // functions that should be the same at link time, Clang groups them.
        // That is unhelpful for us though, since we need to convert them into
        // two separate `extern` blocks.
        if (!VD->isCanonicalDecl() && !VD->isExternC()) {
            // Emit non-canonical decl so we have a placeholder to attach comments to
            std::vector<void *> childIds = {VD->getCanonicalDecl()};
            encode_entry(VD, TagNonCanonicalDecl, VD->getLocation(), childIds, VD->getType());
            typeEncoder.VisitQualTypeOf(VD->getType(), VD);
            return true;
        }

        auto is_defn = false;
        auto def = VD;
        // Focus on the definition for a particular canonical declaration
        for (auto x : VD->redecls()) {
            if (!x->hasExternalStorage() || x->getInit()) {
                is_defn = true;
                def = x;
            }
        }

        std::vector<void *> childIds{};

        // A local var def should allow for the possibility of no initializer
        // and be marked as not a definition
        if (VD->isExternC() && VD->isLocalVarDecl()) {
            is_defn = false;
        }

        auto is_externally_visible = VD->isExternallyVisible();

        // Non static (externally visible) non definitions shouldn't receive an initializer,
        // otherwise get one
        if (!(is_externally_visible && !is_defn)) {
            childIds.push_back((void *)VD->getAnyInitializer());
        }

        // Use the type from the definition in case the extern was an incomplete
        // type
        auto T = def->getType();

        auto loc = is_defn ? def->getLocation() : VD->getLocation();

        encode_entry(
            VD, TagVarDecl, loc, childIds, T,
            [VD, is_defn, def, is_externally_visible](CborEncoder *array) {
                auto name = VD->getNameAsString();
                cbor_encode_string(array, name);

                auto has_static_duration =
                    VD->getStorageDuration() == SD_Static;
                cbor_encode_boolean(array, has_static_duration);

                auto has_thread_duration =
                    VD->getStorageDuration() == SD_Thread;
                cbor_encode_boolean(array, has_thread_duration);
                cbor_encode_boolean(array, is_externally_visible);
                cbor_encode_boolean(array, is_defn);

                // Encode attribute names and relevant info if supported
                CborEncoder attr_info;

                cbor_encoder_create_array(array, &attr_info,
                                          CborIndefiniteLength);

                bool has_attrs = def ? def->hasAttrs() : VD->hasAttrs();

                if (has_attrs) {
                    auto attrs = def ? def->getAttrs() : VD->getAttrs();

                    for (auto attr : def->attrs()) {
                        cbor_encode_text_stringz(&attr_info,
                                                 attr->getSpelling());

                        if (auto *sa = dyn_cast<SectionAttr>(attr)) {
                            cbor_encode_text_stringz(
                                &attr_info, sa->getName().str().c_str());
                        } else if (auto *aa = dyn_cast<AliasAttr>(attr)) {
                            cbor_encode_text_stringz(
                                &attr_info, aa->getAliasee().str().c_str());
                        }
                    }
                }

                cbor_encoder_close_container(array, &attr_info);
            });

        typeEncoder.VisitQualTypeOf(T, def);

        return true;
    }

    /*
     Represents a struct/union
     Children:
     - canonical field declarations
     Extras:
     - name as string
     */
    bool VisitRecordDecl(RecordDecl *D) {
        if (!D->isCanonicalDecl()) {
            // Emit non-canonical decl so we have a placeholder to attach comments to.
            // Attributes may also be attached to the non-canonical declaration so
            // we emit them too.
            std::vector<void *> childIds = {D->getCanonicalDecl()};
            encode_entry(D, TagNonCanonicalDecl, D->getLocation(), childIds, QualType(),
                [D](CborEncoder *local) {
                // 1. Attributes stored as an array of attribute names
                CborEncoder attrs;
                size_t attrs_n = D->hasAttrs() ? D->getAttrs().size() : 0;
                cbor_encoder_create_array(local, &attrs, attrs_n);
                for (auto a : D->attrs()) {
                    cbor_encode_text_stringz(&attrs, a->getSpelling());
                }
                cbor_encoder_close_container(local, &attrs);
            });
            return true;
        }

        auto def = D->getDefinition();
        auto recordAlignment = 0;
        auto byteSize = 0;

        auto t = D->getTypeForDecl();

        auto loc = D->getLocation();
        std::vector<void *> childIds;
        if (def) {
            for (auto decl : def->decls()) {
                auto kind = decl->getKind();
                // Note: We skip `Decl::Kind::IndirectField`.
                if (kind == Decl::Kind::Field 
                    || kind == Decl::Kind::Enum 
                    || kind == Decl::Kind::Record) {
                    childIds.push_back(decl->getCanonicalDecl());
                }
            }
            
            // Since the RecordDecl D isn't the complete definition,
            // the actual location should be given. This should handle opaque
            // types.
            loc = def->getLocation();

            const ASTRecordLayout &layout =
                this->Context->getASTRecordLayout(def);
            recordAlignment = layout.getAlignment().getQuantity();
            byteSize = layout.getSize().getQuantity();
        }

        auto tag = D->isStruct() ? TagStructDecl : TagUnionDecl;

        encode_entry(
            D, tag, loc, childIds, QualType(),
            [D, def, recordAlignment, byteSize](CborEncoder *local) {
                // 1. Encode name or null
                auto name = D->getNameAsString();
                if (name.empty()) {
                    cbor_encode_null(local);
                } else {
                    cbor_encode_string(local, name);
                }

                // 2. Boolean true when definition present
                cbor_encode_boolean(local, !!def);

                // 3. Attributes stored as an array of attribute names
                CborEncoder attrs;
                size_t attrs_n = D->hasAttrs() ? D->getAttrs().size() : 0;
                cbor_encoder_create_array(local, &attrs, attrs_n);
                for (auto a : D->attrs()) {
                    cbor_encode_text_stringz(&attrs, a->getSpelling());
                }
                cbor_encoder_close_container(local, &attrs);

                // 4. Encode manually specified alignment
                auto align = D->getMaxAlignment();
                if (align == 0) {
                    cbor_encode_null(local);
                } else {
                    cbor_encode_uint(local, align / 8);
                }

                // 5. Encode pragma pack(n)
                if (auto const mfaa = D->getAttr<MaxFieldAlignmentAttr>()) {
                    cbor_encode_uint(local, mfaa->getAlignment() / 8);
                } else {
                    cbor_encode_null(local);
                }

                // 6. Encode the platform specific size of this record
                cbor_encode_uint(local, byteSize);

                // 7. Encode the platform specific alignment of this record
                cbor_encode_uint(local, recordAlignment);
            });

        return true;
    }

    bool VisitEnumDecl(EnumDecl *D) {
        // Unlike struct or union, there are no forward-declared enums in ISO C.
        // They are used in actual code and accepted by compilers, so we cannot
        // exit early via code like `if (!D->isCompleteDefinition()) return true;`.

        auto t = D->getTypeForDecl();

        std::vector<void *> childIds;
        for (auto x : D->enumerators()) {
            childIds.push_back(x->getCanonicalDecl());
        }

        auto underlying_type = D->getIntegerType();
        typeEncoder.VisitQualTypeOf(underlying_type, D);

        encode_entry(D, TagEnumDecl, childIds, underlying_type,
                     [D](CborEncoder *local) {
                         auto name = D->getNameAsString();
                         if (name.empty()) {
                             cbor_encode_null(local);
                         } else {
                             cbor_encode_string(local, name);
                         }
                     });

        return true;
    }

    bool VisitEnumConstantDecl(EnumConstantDecl *D) {
        if (!D->isCanonicalDecl()) {
            // Emit non-canonical decl so we have a placeholder to attach comments to
            std::vector<void *> childIds = {D->getCanonicalDecl()};
            encode_entry(D, TagNonCanonicalDecl, D->getLocation(), childIds, QualType());
            return true;
        }

        std::vector<void *> childIds; // = { D->getInitExpr() };

        encode_entry(D, TagEnumConstantDecl, childIds, QualType(),
                     [D](CborEncoder *local) {
                         auto name = D->getNameAsString();
                         cbor_encode_string(local, name);

                         auto value = D->getInitVal();
                         cbor_encode_boolean(local, value.isSigned());
                         if (value.isSigned()) {
                             cbor_encode_int(local, value.getSExtValue());
                         } else {
                             cbor_encode_uint(local, value.getZExtValue());
                         }
                     });
        return true;
    }

    bool VisitFieldDecl(FieldDecl *D) {
        if (!D->isCanonicalDecl()) {
            // Emit non-canonical decl so we have a placeholder to attach comments to
            std::vector<void *> childIds = {D->getCanonicalDecl()};
            encode_entry(D, TagNonCanonicalDecl, D->getLocation(), childIds, D->getType());
            typeEncoder.VisitQualTypeOf(D->getType(), D);
            return true;
        }

        std::vector<void *> childIds;
        auto t = D->getType();

        auto record = D->getParent();
        const ASTRecordLayout &layout =
            this->Context->getASTRecordLayout(record);
        auto index = D->getFieldIndex();
        auto bitOffset = layout.getFieldOffset(index);
        auto bitWidth = this->Context->getTypeSize(t);
        encode_entry(D, TagFieldDecl, childIds, t,
                     [D, this, bitOffset, bitWidth](CborEncoder *array) {
                         // 1. Encode field name
                         auto name = D->getNameAsString();
                         cbor_encode_string(array, name);

                         // 2. Encode bitfield width if any
                         if (D->isBitField()) {
#if LLVM_VERSION_MAJOR >= 20
                             const auto bitWidthValue = D->getBitWidthValue();
#else
                             const auto bitWidthValue = D->getBitWidthValue(*this->Context);
#endif
                             cbor_encode_uint(array, bitWidthValue);
                         } else {
                             cbor_encode_null(array);
                         };

                         // 3. Encode bit offset in its record
                         cbor_encode_uint(array, bitOffset);

                         // 4. Encode the type's full bit width (even if a
                         // bitfield)
                         cbor_encode_uint(array, bitWidth);
                     });

        // This might be the only occurrence of this type in the translation unit
        typeEncoder.VisitQualTypeOf(t, D);

        return true;
    }

    bool VisitTypedefNameDecl(TypedefNameDecl *D) {
        auto typeForDecl = D->getUnderlyingType();
        // If this typedef is to a compiler-builtin macro with target-dependent definition, note the
        // macro's name so we can map to the appropriate target-independent name (e.g. `size_t`).
        auto targetDependentMacro = [&]() -> std::optional<std::string> {
            TypeSourceInfo *TSI = D->getTypeSourceInfo();
            if (!TSI) {
                return std::nullopt;
            }

            TypeLoc typeLoc = TSI->getTypeLoc();
            SourceLocation loc = typeLoc.getBeginLoc();

            if (loc.isInvalid()) {
                return std::nullopt;
            }

            // Check if the location is from a macro expansion
            if (!loc.isMacroID()) {
                return std::nullopt;
            }

            auto macroName = Lexer::getImmediateMacroName(loc, Context->getSourceManager(), Context->getLangOpts());
            // Double-underscore indicates that name is reserved for the implementation,
            // so this should not interfere with user code.
#if CLANG_VERSION_MAJOR >= 18
            if (!macroName.starts_with("__")) {
#else
            if (!macroName.startswith("__")) {
#endif // CLANG_VERSION_MAJOR
                return std::nullopt;
            }
            return std::make_optional(std::string(macroName));
        }();
        if (!D->isCanonicalDecl()) {
            // Emit non-canonical decl so we have a placeholder to attach comments to
            std::vector<void *> childIds = {D->getCanonicalDecl()};
            encode_entry(D, TagNonCanonicalDecl, D->getLocation(), childIds, typeForDecl);
            typeEncoder.VisitQualTypeOf(typeForDecl, D);
            return true;
        }

        std::vector<void *> childIds;
        encode_entry(D, TagTypedefDecl, childIds, typeForDecl,
                     [D, targetDependentMacro](CborEncoder *array) {
                         auto name = D->getNameAsString();
                         cbor_encode_string(array, name);

                         cbor_encode_boolean(array, D->isImplicit());

                         if (targetDependentMacro) {
                             cbor_encode_string(array, targetDependentMacro->data());
                         } else {
                             cbor_encode_null(array);
                         }
                     });

        typeEncoder.VisitQualTypeOf(typeForDecl, D);

        return true;
    }

    //
    // Literals
    //

    bool VisitIntegerLiteral(IntegerLiteral *IL) {

        auto &sourceManager = Context->getSourceManager();
        auto prefix = sourceManager.getCharacterData(IL->getLocation());
        auto value = IL->getValue().getLimitedValue();

        auto base = (value == 0 || prefix[0] != '0')
                        ? 10U
                        : (prefix[1] == 'x' || prefix[1] == 'X') ? 16U : 8U;

        std::vector<void *> childIds;
        encode_entry(IL, TagIntegerLiteral, childIds,
                     [value, base](CborEncoder *array) {
                         cbor_encode_uint(array, value);
                         cbor_encode_uint(array, base);
                     });
        return true;
    }

#if CLANG_VERSION_MAJOR >= 7
    bool VisitFixedPointLiteral(FixedPointLiteral *L) {
        printDiag(Context, DiagnosticsEngine::Warning, "Encountered unsupported fixed point literal", L);
        return true;
    }
#endif // CLANG_VERSION_MAJOR

    bool VisitImaginaryLiteral(ImaginaryLiteral *L) {
        printDiag(Context, DiagnosticsEngine::Warning, "Encountered unsupported imaginary literal", L);
        return true;
    }

    bool VisitCharacterLiteral(CharacterLiteral *L) {
        std::vector<void *> childIds;
        encode_entry(L, TagCharacterLiteral, childIds, [L](CborEncoder *array) {
            auto lit = L->getValue();
            cbor_encode_uint(array, lit);
        });
        return true;
    }

    bool VisitStringLiteral(clang::StringLiteral *SL) {
        std::vector<void *> childIds;
        encode_entry(SL, TagStringLiteral, childIds, [SL](CborEncoder *array) {
            // C and C++ supports different string types, so
            // we need to identify the string literal type
            switch (SL->getKind()) {
#if CLANG_VERSION_MAJOR >= 18
            case clang::StringLiteralKind::Ordinary:
#elif CLANG_VERSION_MAJOR >= 15
            case clang::StringLiteral::StringKind::Ordinary:
#else
            case clang::StringLiteral::StringKind::Ascii:
#endif // CLANG_VERSION_MAJOR
                cbor_encode_uint(array, StringTypeTag::TagAscii);
                break;
#if CLANG_VERSION_MAJOR >= 18
            case clang::StringLiteralKind::Wide:
#else
            case clang::StringLiteral::StringKind::Wide:
#endif // CLANG_VERSION_MAJOR
                cbor_encode_uint(array, StringTypeTag::TagWide);
                break;
#if CLANG_VERSION_MAJOR >= 18
            case clang::StringLiteralKind::UTF8:
#else
            case clang::StringLiteral::StringKind::UTF8:
#endif // CLANG_VERSION_MAJOR
                cbor_encode_uint(array, StringTypeTag::TagUTF8);
                break;
#if CLANG_VERSION_MAJOR >= 18
            case clang::StringLiteralKind::UTF16:
#else
            case clang::StringLiteral::StringKind::UTF16:
#endif // CLANG_VERSION_MAJOR
                cbor_encode_uint(array, StringTypeTag::TagUTF16);
                break;
#if CLANG_VERSION_MAJOR >= 18
            case clang::StringLiteralKind::UTF32:
#else
            case clang::StringLiteral::StringKind::UTF32:
#endif // CLANG_VERSION_MAJOR
                cbor_encode_uint(array, StringTypeTag::TagUTF32);
                break;
#if CLANG_VERSION_MAJOR >= 17
#if CLANG_VERSION_MAJOR >= 18
            case clang::StringLiteralKind::Unevaluated:
#else // CLANG_VERSION_MAJOR >= 17
            case clang::StringLiteral::StringKind::Unevaluated:
#endif
                cbor_encode_uint(array, StringTypeTag::TagUnevaluated);
                break;
#endif // CLANG_VERSION_MAJOR
            }
            // The size of the wchar_t type in C is implementation defined
            cbor_encode_uint(array, SL->getCharByteWidth());

            // String literals can contain arbitrary bytes, so
            // we encode these as byte strings rather than text.

            const uint8_t *bytes =
                reinterpret_cast<const uint8_t *>(SL->getBytes().data());
            cbor_encode_byte_string(array, bytes, SL->getByteLength());
        });
        return true;
    }

    bool VisitFloatingLiteral(clang::FloatingLiteral *L) {

        auto &sourceManager = Context->getSourceManager();
        auto prefix = sourceManager.getCharacterData(L->getLocation());
        auto lexeme = matchFloatingLiteral(prefix);

        std::vector<void *> childIds;
        encode_entry(L, TagFloatingLiteral, childIds,
                     [L, &lexeme](CborEncoder *array) {
                         auto lit = L->getValueAsApproximateDouble();
                         cbor_encode_double(array, lit);
                         cbor_encode_string(array, lexeme);
                     });
        return true;
    }

  private:
    // Inspired by a lambda function within `clang/lib/Sema/SemaType.cpp`
    bool isVaList(Decl *D, QualType T) {
        if (auto *RD = dyn_cast<RecordDecl>(D))
            if (auto *name = RD->getIdentifier())
                if (name->isStr("__va_list_tag"))
                    return true;

        if (T.isNull())
            return false;

        if (auto *TD = T->getAs<TypedefType>()) {
            auto *builtinVaList = Context->getBuiltinVaListDecl();
            do {
                if (TD->getDecl() == builtinVaList)
                    return true;
                if (auto *name = TD->getDecl()->getIdentifier())
                    if (name->isStr("va_list"))
                        return true;
                TD = TD->desugar()->getAs<TypedefType>();
            } while (TD);
        }

        if (auto *RT = T->getPointeeOrArrayElementType()->getAs<RecordType>())
            if (auto *name = RT->getDecl()->getIdentifier())
                if (name->isStr("__va_list_tag"))
                    return true;

        return false;
    }
};

void TypeEncoder::VisitEnumType(const EnumType *T) {
    auto ed = T->getDecl()->getDefinition();
    encodeType(T, TagEnumType, [T, ed](CborEncoder *local) {
        cbor_encode_uint(local, uintptr_t(ed));
    });

    if (ed != nullptr) astEncoder->TraverseDecl(ed);
}

void TypeEncoder::VisitRecordType(const RecordType *T) {

    // Should only ever be reached during the first pass
    if (T->isSugared()) {
        auto qt = T->desugar();
        sugared->emplace((void *)T, qt);
        VisitQualType(qt);
    }

    auto tag = T->isStructureType() ? TagStructType : TagUnionType;

    encodeType(T, tag, [T](CborEncoder *local) {
        cbor_encode_uint(local, uintptr_t(T->getDecl()->getCanonicalDecl()));
    });

    // record type might be anonymous and have no top-level declaration
    // structure declarations can reference themselves, so we need
    // a way to guard against unbounded recursion.
    clang::RecordDecl *D = T->getDecl();
    if (recordDeclsUnderVisit.emplace(D).second) {
        astEncoder->TraverseDecl(D);
        recordDeclsUnderVisit.erase(D);
    }
}

void TypeEncoder::VisitTypedefType(const TypedefType *T) {

    auto D = T->getDecl()->getCanonicalDecl();

    encodeType(T, TagTypedefType, [D](CborEncoder *local) {
        cbor_encode_uint(local, uintptr_t(D));
    });
    astEncoder->TraverseDecl(D);
}

void TypeEncoder::VisitVariableArrayType(const VariableArrayType *T) {
    auto t = T->getElementType();
    auto qt = encodeQualType(t);

    auto c = T->getSizeExpr();
    astEncoder->TraverseStmt(c);

    encodeType(T, TagVariableArrayType, [qt, c](CborEncoder *local) {
        cbor_encode_uint(local, qt);
        if (c) {
            cbor_encode_uint(local, uintptr_t(c));
        } else {
            // This case occurs when the expression omitted and * is used:
            // void a_function(int example[][*]);
            cbor_encode_null(local);
        }
    });

    VisitQualType(t);
}

void TypeEncoder::VisitAtomicType(const AtomicType *AT) {
  auto t = AT->getValueType();
  auto qt = encodeQualType(t);

  encodeType(AT, TagAtomicType, [qt](CborEncoder *local) {
      cbor_encode_uint(local, qt);
  });

  VisitQualType(t);
}

class TranslateConsumer : public clang::ASTConsumer {
    Outputs *outputs;
    const std::string outfile;
    Preprocessor &PP;

  public:
    explicit TranslateConsumer(Outputs *outputs, llvm::StringRef InFile, Preprocessor &PP)
        : outputs(outputs), outfile(InFile.str()), PP(PP) {}

    virtual void HandleTranslationUnit(clang::ASTContext &Context) {

        CborEncoder encoder;

        // There are some type nodes (see `TypedefType` and `RecordType`) which
        // can be "sugared". That means we should not follow the declarations we
        // normally would follow for those types, but we should use the
        // `desugared` type instead.
        std::unordered_map<void *, QualType> sugared;

        auto process = [&encoder, &Context, &sugared, this](uint8_t *buffer,
                                                            size_t len) {
            cbor_encoder_init(&encoder, buffer, len, 0);

            CborEncoder outer;
            cbor_encoder_create_array(&encoder, &outer, 6);

            CborEncoder array;

            // 1. Encode all of the reachable AST nodes and types
            cbor_encoder_create_array(&outer, &array, CborIndefiniteLength);
            TranslateASTVisitor visitor(&Context, &array, &sugared, PP);
            auto translation_unit = Context.getTranslationUnitDecl();
            visitor.TraverseDecl(translation_unit);
            visitor.encodeMacros();
            cbor_encoder_close_container(&outer, &array);

            // 2. Track all of the top-level declarations
            cbor_encoder_create_array(&outer, &array, CborIndefiniteLength);
            for (auto d : translation_unit->decls()) {
                if (!d->isCanonicalDecl() && isa<VarDecl>(d)) {
                    auto canonical_decl = d->getCanonicalDecl();
                    auto var_decl = cast<VarDecl>(canonical_decl);

                    // Non-Canonical Decls which don't have an extern local canonical decl
                    // should be skipped
                    if (!(var_decl->isExternC() && var_decl->isLocalVarDecl())) {
                        continue;
                    }
                }

                // Empty-decls aren't exported. This avoids warnings during conversion.
                if (isa<EmptyDecl>(d)) {
                    continue;
                }

                cbor_encode_uint(&array, reinterpret_cast<std::uintptr_t>(d));
            }
            cbor_encoder_close_container(&outer, &array);

            // 3. Encode all of the visited file names
            auto files = visitor.getFiles();
            cbor_encoder_create_array(&outer, &array, files.size());
            for (auto const &file : files) {
                CborEncoder entry;
                cbor_encoder_create_array(&array, &entry, 2);
                cbor_encode_string(&entry, file.first);
                if (file.second.isValid()) {
                    CborEncoder locEntry;
                    cbor_encoder_create_array(&entry, &locEntry, 3);
                    visitor.encodeSourcePos(&locEntry, file.second);
                    cbor_encoder_close_container(&entry, &locEntry);
                } else {
                    cbor_encode_null(&entry);
                }
                cbor_encoder_close_container(&array, &entry);
            }
            cbor_encoder_close_container(&outer, &array);

            // 4. Emit comments as array of arrays. Each comment is represented
            // as an array of source position followed by comment string.
            //
            // Getting all comments requires -fparse-all-comments (see
            // augment_argv())!
            const SourceManager& sourceMgr = Context.getSourceManager();
#if CLANG_VERSION_MAJOR < 10
            auto comments = Context.getRawCommentList().getComments();
            cbor_encoder_create_array(&outer, &array, comments.size());
            for (auto comment : comments) {
                CborEncoder entry;
                cbor_encoder_create_array(&array, &entry, 4);
#if CLANG_VERSION_MAJOR < 8
                SourceLocation loc = comment->getLocStart();
#else // 7 < CLANG_VERSION_MAJOR < 10
                SourceLocation loc = comment->getBeginLoc();
#endif // CLANG_VERSION_MAJOR < 8
                visitor.encodeSourcePos(&entry, loc); // emits 3 values
                auto raw_text = comment->getRawText(sourceMgr);
                cbor_encode_byte_string(&entry, raw_text.bytes_begin(),
                                        raw_text.size());
                cbor_encoder_close_container(&array, &entry);
            }
#else  // CLANG_VERSION_MAJOR >= 10
            const FileID file = sourceMgr.getMainFileID();
            auto comments = Context.Comments.getCommentsInFile(file);
            if (comments != nullptr) {
                cbor_encoder_create_array(&outer, &array, comments->size());
                for (auto comment : *comments) {
                    CborEncoder entry;
                    cbor_encoder_create_array(&array, &entry, 4);
                    SourceLocation loc = comment.second->getBeginLoc();
                    visitor.encodeSourcePos(&entry, loc); // emits 3 values
                    auto raw_text = comment.second->getRawText(sourceMgr);
                    cbor_encode_byte_string(&entry, raw_text.bytes_begin(),
                                            raw_text.size());
                    cbor_encoder_close_container(&array, &entry);
                }
            } else {
                // this happens when the C file contains no comments
                cbor_encoder_create_array(&outer, &array, 0);
            }
#endif // CLANG_VERSION_MAJOR >= 10
            cbor_encoder_close_container(&outer, &array);

            // 5. Target VaList type as BuiltinVaListKind
            cbor_encode_uint(&outer, static_cast<std::uintptr_t>(Context.getTargetInfo().getBuiltinVaListKind()));

            // 6. Target triple
            auto target = Context.getTargetInfo().getTriple().str();
            cbor_encode_string(&outer, target);

            cbor_encoder_close_container(&encoder, &outer);
        };

        // A very large C file (SQLite amalgamation) produces a 18MB CBOR file.
        // Allocate a conservatively large buffer. On most operating systems,
        // the kernel just reserves the virtual address space and allocates
        // physical pages lazily on demand.
        std::vector<uint8_t> buf(64 * 1024 * 1024);

        process(buf.data(), buf.size());
        auto needed = cbor_encoder_get_extra_bytes_needed(&encoder);
        assert(needed == size_t(0) && "CBOR output buffer was too small.");
        auto written = cbor_encoder_get_buffer_size(&encoder, buf.data());
        buf.resize(written);
        buf.shrink_to_fit();

        (*outputs)[make_realpath(outfile)] = std::move(buf);
    }
};

class TranslateAction : public clang::ASTFrontendAction {
    Outputs *outputs;

  public:
    TranslateAction(Outputs *outputs) : outputs(outputs) {}

    virtual std::unique_ptr<clang::ASTConsumer>
    CreateASTConsumer(clang::CompilerInstance &Compiler,
                      llvm::StringRef InFile) {

#if CLANG_VERSION_MAJOR < 10
        const InputKind::Language lang_c = InputKind::Language::C;
#else
        const Language lang_c = Language::C;
#endif // CLANG_VERSION_MAJOR
        if (this->getCurrentFileKind().getLanguage() != lang_c) {
            return nullptr;
        }

        return std::unique_ptr<clang::ASTConsumer>(
            new TranslateConsumer(outputs, InFile, Compiler.getPreprocessor()));
    }
};

// Apply a custom category to all command-line options so that they are the
// only ones displayed.
static llvm::cl::OptionCategory MyToolCategory("my-tool options");

// Added in C++ 17
template <class _Tp, size_t _Sz>
constexpr size_t size(const _Tp (&)[_Sz]) noexcept {
    return _Sz;
}

// We augment the command line arguments to ensure that comments are always
// parsed and string literals are always treated as constant.
static std::vector<const char *> augment_argv(int argc, const char *argv[]) {
    const char *const extras[] = {
        "-extra-arg=-fno-builtin-strlen",  // builtin strlen wrongly returns
                                           // unsigned long despite declaration
        "-extra-arg=-fparse-all-comments", // always parse comments
        "-extra-arg=-Wwrite-strings",      // string literals are constant
        "-extra-arg=-D_FORTIFY_SOURCE=0",  // we don't want to use checked
                                           // versions of libc. without this we
        // get calls to __builtin__memcpy_chk,
        // etc.

        // Also #define C2RUST, so examples can conditionally omit C code that
        // needs special handling in the Rust version (e.g., varargs functions)
        "-extra-arg=-DC2RUST=1",
    };

    // Build a -resource-dir argument based on the path to the linked clang
    // installation. Without this, ClangTool builds the resource directory from
    // the path to the tool (in this case, the binary running the AST Exporter).
    SmallString<128> P("-extra-arg=-resource-dir=" CLANG_BIN_PATH);
    llvm::sys::path::append(P, "..", Twine("lib") + CLANG_LIBDIR_SUFFIX,
                            "clang", std::to_string(CLANG_VERSION_MAJOR));
    std::string resource_dir = P.str().str();
    char *resource_dir_cstr = new char[resource_dir.length() + 1];
    strncpy(resource_dir_cstr, resource_dir.c_str(), resource_dir.length() + 1);

    auto argv_ = std::vector<const char *>();
    argv_.reserve(argc + size(extras) + 2);

    auto pusher = std::back_inserter(argv_);
    std::copy_n(argv, argc, pusher);
    std::copy_n(extras, size(extras), pusher);
    *pusher++ = resource_dir_cstr;
    *pusher++ =
        nullptr; // The value of argv[argc] is guaranteed to be a null pointer.

    return argv_;
}

class MyFrontendActionFactory : public FrontendActionFactory {
    Outputs *outputs;

  public:
    MyFrontendActionFactory(Outputs *outputs) : outputs(outputs) {}

#if CLANG_VERSION_MAJOR < 10
    clang::FrontendAction *create() override {
        return new TranslateAction(outputs);
    }
#else
    std::unique_ptr<FrontendAction> create() override {
        return std::make_unique<TranslateAction>(outputs);
    }
#endif // CLANG_VERSION_MAJOR
};

// Marshal the output map into something easy to manipulate in Rust
ExportResult *make_export_result(const Outputs &outputs) {
    auto result = new ExportResult;
    auto n = outputs.size();
    result->resize(n);

    std::size_t i = 0;
    for (auto const &kv : outputs) {
        auto const &name = kv.first;
        auto const &bytes = kv.second;

        auto name_array = new char[name.size() + 1];
        strcpy(name_array, name.c_str());
        result->names[i] = name_array;

        auto byte_array = new uint8_t[bytes.size()];
        std::copy(std::begin(bytes), std::end(bytes), byte_array);
        result->bytes[i] = byte_array;
        result->sizes[i] = bytes.size();
        i++;
    }

    return result;
}

// Extract clang AST for the source file specified in the argument vector.
// Note: The arguments should only reference one source file at a time.
Outputs process(int argc, const char *argv[], int *result) {
    auto argv_ = augment_argv(argc, argv);
    int argc_ = argv_.size() - 1; // ignore the extra nullptr

#if CLANG_VERSION_MAJOR < 13
    CommonOptionsParser OptionsParser(argc_, argv_.data(), MyToolCategory);
#else
    Expected<CommonOptionsParser> parseResult =
        CommonOptionsParser::create(argc_, argv_.data(), MyToolCategory);
    if (auto err = parseResult.takeError()) {
        logAllUnhandledErrors(std::move(err), errs(), "[Parse Error] ");
        assert(0 && "Failed to parse command line options");
    }
    CommonOptionsParser& OptionsParser = *parseResult;
#endif

    // the logic below assumes we're only translating one source file
    static size_t source_path_count = 0;
    source_path_count++;
    const size_t num_sources = OptionsParser.getSourcePathList().size();
    assert(
        (num_sources == 1 // newer clang versions
        || num_sources == source_path_count // older clang versions
        ) && "Expected exactly one source path"
    );

    // CommonOptionsParser is stateful so the vector returned by
    // getSourcePathList() includes paths from past invocations.
    std::string sourcePath = OptionsParser.getSourcePathList().back();
    // Make a new list with just the file we're currently translating
    std::vector<std::string> sourcePathList(1, sourcePath);
    ClangTool Tool(OptionsParser.getCompilations(), sourcePathList);

    Outputs outputs;
    MyFrontendActionFactory myFrontendActionFactory(&outputs);

    *result = Tool.run(&myFrontendActionFactory);
    assert(outputs.size() == 1 && "Expected exactly one output.");
    return outputs;
}

// AST exporter library interface.
extern "C" {
ExportResult *ast_exporter(int argc, const char *argv[], int debug) {
#ifndef NDEBUG
    if (debug) {
        llvm::DebugFlag = true;
        llvm::setCurrentDebugType(DEBUG_TYPE);
    }
#endif // NDEBUG

    int result;
    auto outputs = process(argc, argv, &result);
    return make_export_result(outputs);
}

void drop_export_result(ExportResult *result) { delete result; }

const char *clang_version() { return "" CLANG_VERSION_STRING; }
}
